In [39]:
import MI_analyzer as mia # Contains mutual information calculations and the blue-red heat map plotter.
import NMI_analyzer as nmia # Contains normalized mutual information calculations and the binary heat map plotter.
import fasta_to_numpy as ftn # Turns an alignment into a numpy array.
import pandas

# First: import our multiple sequence alignment, parse it, reverse complement, and turn it into a numpy array.
np_alignment = ftn.Parse_and_reverse_complement('Segment 5 - First-Last 200/Segment 5 NP Aligned and Trimmed First \
and Last 200 nt.fasta')

In [40]:
print np_alignment

[['T' 'C' 'A' ..., 'C' 'A' 'T']
 ['T' 'T' 'A' ..., 'C' 'A' 'T']
 ['T' 'T' 'A' ..., 'C' 'A' 'T']
 ..., 
 ['T' 'C' 'A' ..., 'C' 'A' 'T']
 ['T' 'C' 'A' ..., 'C' 'A' 'T']
 ['T' 'C' 'A' ..., 'C' 'A' 'T']]


In [41]:
# Measure the normalized and regular mutual information, put each into its own array.
nmi_vector  = nmia.Calculate_NMI(np_alignment)
mi_vector  = mia.Calculate_MI(np_alignment)

In [43]:
"""
The MI_analyzer and NMI_analyzer scripts both create a square array with diagonal symmetry. This is important in case
I ever run the scripts individually, in which case I'd want a square matrix to plot. For our figure, though, I combine
half of each array into a single array that contains both MI and NMI. I do this by turning the arrays into pandas
dataframes, which are easier to work with, and them turning them back into numpy arrays.
"""

# Turn both arrays into Pandas dataframes
mi_pandas = pandas.DataFrame(mi_vector)
nmi_pandas = pandas.DataFrame(nmi_vector)

# Replace the upper half of the NMI dataframe with the values from the MI dataframe
for i in range(len(mi_pandas)):
    for j in range(len(mi_pandas)):
        if i < j:
            nmi_pandas[i][j] = mi_pandas[i][j]

# Set the diagonal to 1 (it had been artificially set to 0 in the NMI_analyzer)
for i in range(len(mi_pandas)):
    nmi_pandas[i][i] = 1.0
    
# Turn the finalized MI-NMI dataframe back into a numpy array, which is the input for the plot function.
both_info = nmi_pandas.as_matrix()

In [None]:
# Plot the MI-NMI heat map.
fig = mia.Plot_Binary(both_info, "Seg5_FL200 Both Info Scores.pdf", cmap='PuBu')