Skip to content

Commit

Permalink
pasted new function with similarity array
Browse files Browse the repository at this point in the history
  • Loading branch information
Jebthesheep101 committed May 16, 2024
1 parent 94e20e7 commit 74de398
Showing 1 changed file with 71 additions and 33 deletions.
104 changes: 71 additions & 33 deletions CIAlign/consensusSeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import utilityFunctions
import os
import scipy.stats
import copy
import operator
matplotlib.use('Agg')


Expand Down Expand Up @@ -692,43 +694,79 @@ def calcConservationAli(alignment, typ):
return (heights, ents)


def compareAlignmentConsensus(arr):
'''
Compares the alignment of the input array to the consensus of that array,
and outputs a boolean array.
def compareAlignmentConsensus(arr, typ, booleanOrSimilarity="Boolean", MatrixName="B"):
consensus, _ = np.array(findConsensus(arr, '', consensus_type='majority_nongap') )
if booleanOrSimilarity == "Boolean":
'''
Compares the alignment of the inputted array to the consensus of that array, and outputs a boolean array.
Parameters
----------
alignment: np.array
The alignment stored as a numpy array
alignment: arr
The alignment stored as a numpy array
Returns
-------
A numpy array stored as new_arr, which is a boolean array
comparing the arr to its consensus.
'''
consensus, _ = np.array(findConsensus(arr, '',
consensus_type='majority_nongap'))
bool_array = np.array([])
bool_arrL = np.empty(dtype=bool, shape=(0, len(consensus)))
# declares the numpy arrays
for e in range(1, (len(arr[:, 0])+1)):
# iterates over the rows of the sequences
z = e - 1
for i in range(1, (len(arr[0, :])+1)):
return:
a numpy array stored as new_arr, which is a boolean array comparing the arr to the consensus of it.
'''
bool_array = np.array([])
bool_arrL = np.empty(dtype=bool, shape=(0, len(consensus)))
# declares the numpy arrays
for e in range(1, (len(arr[:,0])+1)):
# iterates over the rows of the sequences
z = e-1
for i in range(1, (len(arr[0,:])+1)):
# iterates over the columns of the sequences
x = i - 1
if arr[z, x] == consensus[x]:
# verifies if the current value being iterated is equal to
# the equivalent value inline with the consensus
x = i-1
if arr[z,x] == consensus[x]:
# verifies if the current value being iterated is equal to the equivalent value inline with the consensus
bool_array = np.append(bool_array, [True], axis=None)
else:
bool_array = np.append(bool_array, [False], axis=None)
bool_arrL = np.vstack([bool_arrL,
bool_array])
bool_arrL = np.vstack([bool_arrL, bool_array])
bool_array = np.array([])
new_arr = copy.deepcopy(bool_arrL)
new_arr = bool_arrL.astype(bool)
# returns the new boolean array containing the verified alignment
# to the consensus
return new_arr
new_arr = copy.deepcopy(bool_arrL)
new_arr = bool_arrL.astype(bool)
# returns the new boolean array containing the verified alignment to the consensus
return new_arr
else:
# generates the consensus
Sarray = np.array([])
SarrL = np.empty(dtype=int, shape=(0, len(consensus)))
# declares the numpy arrays
tab = pd.read_csv("roman_work_experience/matrices.txt", sep="\t", index_col=0)
if typ == "aa":
# verifies if the typ is amino acid or nucleotide
if MatrixName != "B":
if tab.loc[MatrixName][0] != typ:
raise RuntimeError("This matrix is not valid")
# verifies if the matrix is valid
else:
# verifies if the user would like to use the default matrix or their own
mat = pd.read_csv(("%s/similarity_matrices/"+MatrixName) % mydir, comment="#", sep="\s+")
elif MatrixName == "B":
mat = pd.read_csv("%s/similarity_matrices/BLOSUM62" % mydir, comment="#", sep="\s+")
elif typ == "nt":
if MatrixName != "B":
if tab.loc[MatrixName][0] != typ:
raise RuntimeError("This matrix is not valid")
# verifies if the matrix is valid
else:
# verifies if the user would like to use the default matrix or their own
mat = pd.read_csv(("%s/similarity_matrices/"+MatrixName) % mydir, comment="#", sep="\s+")
elif MatrixName == "B":
mat = pd.read_csv("%s/similarity_matrices/NUC.4.4" % mydir, comment="#", sep="\s+")
for e in range(1, (len(arr[:,0])+1)):
# iterates over the rows of the sequences
z = e-1
for i in range(1, (len(arr[0,:])+1)):
# iterates over the columns of the sequences
x = i-1
if not arr[z,x] == "-":
Sarray = np.append(Sarray,[int(mat.loc[arr[z,x],consensus[x]])])
elif arr[z,x] == "-":
# sets the value of '-' as 0
Sarray = np.append(Sarray, 0)
SarrL = np.vstack([SarrL, Sarray])
Sarray = np.array([])
new_Sarr = copy.deepcopy(SarrL)
new_Sarr = SarrL.astype(int)
# returns the new similarity array containing the verified alignment to the consensus
return new_Sarr

0 comments on commit 74de398

Please sign in to comment.