Skip to content

Commit

Permalink
incorporating new similarity matrix plotting function
Browse files Browse the repository at this point in the history
  • Loading branch information
KatyBrown committed May 16, 2024
1 parent 67c139c commit 9dca5d9
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 39 deletions.
25 changes: 22 additions & 3 deletions CIAlign/argP.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,11 +516,30 @@ def getParser():
action="store_true",
help="""Plot a mini alignment showing positions which are \
identical to or differ from the consensus. \
Also has the option to plot mini alignment showing \
positions based on their numerical values when \
when compared via a matrix to the consensus. \
Default: %(default)s""")

optional.add("--plot_consensus_similarity",
dest='plot_consensus_similarity', action='store_true',
help="""Plot mini alignment showing \
positions based on their score when \
when compared via a substitution matrix to the consensus. \
Default: %(default)s""")

optional.add("--plot_substitution_matrix", dest='plot_substitution_matrix',
type=str, default='B',
help="""Substitution matrix to use for similarity plots. \
Default: %(default)s""")

optional.add("--plot_identity_palette", dest='plot_identity_palette',
type=str, default='terrain_r',
help="""Matplotlib palette name for identity mini alignments. \
Default: %(default)s""")

optional.add("--plot_similarity_palette", dest='plot_similarity_palette',
type=str, default='summer_r',
help="""Matplotlib palette name for similarity mini \
alignments. Default: %(default)s""")

optional.add("--plot_dpi", dest="plot_dpi",
type=int, default=300, metavar="(int)",
help="DPI for mini alignments. Default: %(default)s")
Expand Down
77 changes: 52 additions & 25 deletions CIAlign/consensusSeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import scipy.stats
import copy
import operator
import pandas as pd
matplotlib.use('Agg')


Expand Down Expand Up @@ -694,79 +695,105 @@ def calcConservationAli(alignment, typ):
return (heights, ents)


def compareAlignmentConsensus(arr, typ, booleanOrSimilarity="Boolean", MatrixName="B"):
consensus, _ = np.array(findConsensus(arr, '', consensus_type='majority_nongap') )
def compareAlignmentConsensus(arr, typ, booleanOrSimilarity="Boolean",
MatrixName="B"):
consensus, _ = np.array(findConsensus(arr, '',
consensus_type='majority_nongap') )
ci_dir = os.path.dirname(utilityFunctions.__file__)
matrix_dir = "%s/similarity_matrices" % (ci_dir)

if booleanOrSimilarity == "Boolean":
'''
Compares the alignment of the inputted array to the consensus of that array, and outputs a boolean array.
Compares the alignment of the inputted array to the consensus of that
array, and outputs a boolean array.
alignment: arr
The alignment stored as a numpy array
return:
a numpy array stored as new_arr, which is a boolean array comparing the arr to the consensus of it.
a numpy array stored as new_arr, which is a boolean array comparing the
arr to the consensus of it.
'''
bool_array = np.array([])
bool_arrL = np.empty(dtype=bool, shape=(0, len(consensus)))
# declares the numpy arrays
for e in range(1, (len(arr[:,0])+1)):
# iterates over the rows of the sequences
z = e-1
for i in range(1, (len(arr[0,:])+1)):
# iterates over the columns of the sequences
x = i-1
if arr[z,x] == consensus[x]:
# verifies if the current value being iterated is equal to the equivalent value inline with the consensus
bool_array = np.append(bool_array, [True], axis=None)
else:
bool_array = np.append(bool_array, [False], axis=None)
bool_arrL = np.vstack([bool_arrL, bool_array])
bool_array = np.array([])
for i in range(1, (len(arr[0,:])+1)):
# iterates over the columns of the sequences
x = i-1
if arr[z,x] == consensus[x]:
# verifies if the current value being iterated is equal to the
#equivalent value inline with the consensus
bool_array = np.append(bool_array, [True], axis=None)
else:
bool_array = np.append(bool_array, [False], axis=None)
bool_arrL = np.vstack([bool_arrL, bool_array])
bool_array = np.array([])

new_arr = copy.deepcopy(bool_arrL)
new_arr = bool_arrL.astype(bool)
# returns the new boolean array containing the verified alignment to the consensus
# returns the new boolean array containing the verified alignment to
#the consensus
return new_arr
else:
# generates the consensus
Sarray = np.array([])
SarrL = np.empty(dtype=int, shape=(0, len(consensus)))
# declares the numpy arrays
tab = pd.read_csv("roman_work_experience/matrices.txt", sep="\t", index_col=0)
tab = pd.read_csv("%s/matrices.txt" % ci_dir, sep="\t", index_col=0)
if typ == "aa":
# verifies if the typ is amino acid or nucleotide
if MatrixName != "B":
if tab.loc[MatrixName][0] != typ:
raise RuntimeError("This matrix is not valid")
# verifies if the matrix is valid
else:
# verifies if the user would like to use the default matrix or their own
mat = pd.read_csv(("%s/similarity_matrices/"+MatrixName) % mydir, comment="#", sep="\s+")
# verifies if the user would like to use the default matrix or
#their own
mat = pd.read_csv("%s/%s" % (matrix_dir, MatrixName),
comment="#", sep="\s+")
elif MatrixName == "B":
mat = pd.read_csv("%s/similarity_matrices/BLOSUM62" % mydir, comment="#", sep="\s+")
mat = pd.read_csv("%s/BLOSUM62" % (matrix_dir),
comment="#", sep="\s+")
elif typ == "nt":
if MatrixName != "B":
if tab.loc[MatrixName][0] != typ:
raise RuntimeError("This matrix is not valid")
# verifies if the matrix is valid
else:
# verifies if the user would like to use the default matrix or their own
mat = pd.read_csv(("%s/similarity_matrices/"+MatrixName) % mydir, comment="#", sep="\s+")
# verifies if the user would like to use the default
# matrix or their own
mat = pd.read_csv("%s/%s" % (matrix_dir, MatrixName),
comment="#", sep="\s+")
elif MatrixName == "B":
mat = pd.read_csv("%s/similarity_matrices/NUC.4.4" % mydir, comment="#", sep="\s+")
mat = pd.read_csv("%s/NUC.4.4" % (matrix_dir), comment="#",
sep="\s+")
for e in range(1, (len(arr[:,0])+1)):
# iterates over the rows of the sequences
z = e-1
for i in range(1, (len(arr[0,:])+1)):
# iterates over the columns of the sequences
x = i-1
if not arr[z,x] == "-":
Sarray = np.append(Sarray,[int(mat.loc[arr[z,x],consensus[x]])])
if not arr[z, x] == "-":
if arr[z, x] == "U" and typ == 'nt':
thischar = "T"
else:
thischar = arr[z, x]
if consensus[x] == "U" and typ == 'nt':
conschar = "T"
else:
conschar = consensus[x]
score = mat.loc[thischar, conschar]
Sarray = np.append(Sarray, [score])
elif arr[z,x] == "-":
# sets the value of '-' as 0
Sarray = np.append(Sarray, 0)
SarrL = np.vstack([SarrL, Sarray])
Sarray = np.array([])
new_Sarr = copy.deepcopy(SarrL)
new_Sarr = SarrL.astype(int)
# returns the new similarity array containing the verified alignment to the consensus
# returns the new similarity array containing the verified alignment
# to the consensus
return new_Sarr
16 changes: 13 additions & 3 deletions CIAlign/miniAlignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,10 @@ def drawMarkUpLegend(outfile, palette="CBS"):
def drawMiniAlignment(arr, nams, log, outfile, typ, plot_type='standard',
dpi=300, title=None, width=5, height=3, markup=False,
markupdict=None, ret=False, orig_nams=[],
keep_numbers=False, force_numbers=False, palette="CBS"):
keep_numbers=False, force_numbers=False, palette="CBS",
sub_matrix_name='B', plot_identity_palette='terrain_r',
plot_similarity_palette='summer_r',
plot_substitution_matrix='B'):
'''
Draws a "mini alignment" image showing a small representation of the
whole alignment so that gaps and poorly aligned regions are visible.
Expand Down Expand Up @@ -322,8 +325,15 @@ def drawMiniAlignment(arr, nams, log, outfile, typ, plot_type='standard',
if plot_type == 'standard':
arr2, cm = arrNumeric(arr, typ, palette)
elif plot_type == 'boolean':
arr2 = consensusSeq.compareAlignmentConsensus(arr)
cm = matplotlib.colormaps['jet']
arr2 = consensusSeq.compareAlignmentConsensus(
arr, typ=typ, booleanOrSimilarity="Boolean")
cm = matplotlib.colormaps[plot_identity_palette]
elif plot_type == 'similarity':
arr2 = consensusSeq.compareAlignmentConsensus(
arr, typ=typ, booleanOrSimilarity="similarity",
MatrixName=sub_matrix_name)
cm = matplotlib.colormaps[plot_similarity_palette]

# display it on the axis
a.imshow(arr2, cmap=cm, aspect='auto', interpolation='nearest')

Expand Down
36 changes: 28 additions & 8 deletions CIAlign/runCIAlign.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def whichFunctions(args):
args.plot_output,
args.plot_markup,
args.plot_consensus_identity,
args.plot_consensus_similarity,
args.visualise,
args.all_options]):
which_functions.append("mini_alignments")
Expand Down Expand Up @@ -892,14 +893,33 @@ def runMiniAlignments(args, log, orig_arr, orig_nams, arr, nams,
print("Plotting identity to consensus")
outf = "%s_consensus_identity.%s" % (args.outfile_stem,
args.plot_format)
miniAlignments.drawMiniAlignment(arr, nams, log,
outf, typ,
plot_type='boolean',
dpi=args.plot_dpi,
width=args.plot_width,
height=args.plot_height,
force_numbers=fn,
palette=args.palette)
miniAlignments.drawMiniAlignment(
arr, nams, log,
outf, typ,
plot_type='boolean',
dpi=args.plot_dpi,
width=args.plot_width,
height=args.plot_height,
force_numbers=fn,
palette=args.palette,
plot_identity_palette=args.plot_identity_palette)
if args.plot_consensus_similarity:
log.info("Plotting similarity to consensus")
if not args.silent:
print("Plotting similarity to consensus")
outf = "%s_consensus_similarity.%s" % (args.outfile_stem,
args.plot_format)
miniAlignments.drawMiniAlignment(
arr, nams, log,
outf, typ,
plot_type='similarity',
dpi=args.plot_dpi,
width=args.plot_width,
height=args.plot_height,
force_numbers=fn,
palette=args.palette,
plot_similarity_palette=args.plot_similarity_palette,
plot_substitution_matrix=args.plot_substitution_matrix)


def runConsensus(args, log, orig_arr, orig_nams, arr, nams, removed_seqs):
Expand Down

0 comments on commit 9dca5d9

Please sign in to comment.