In [None]:
import glob
import pandas as pd
import numpy as np
from scipy import linalg as LA
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.cm as cm

In [None]:
def svd_function(b,n):
    mu = np.mean(b,axis=1)
    y = b - mu[:,None]
    U,w,Vt = LA.svd(y,full_matrices=False)
    S = LA.diagsvd(w,len(w),len(w))
    print('Checking If the SVD went well...')
    print(np.allclose(y,np.dot(U,np.dot(S,Vt))))
    V = np.transpose(Vt)
    V0=np.dot(V[:,0:n],S[0:n,0:n]) 
    return V0

def plot_scatter_2d(V0,labels,q1,q2,savefig):
    matplotlib.rcParams.update({'font.size': 10})
    print(len(labels),len(labels))
    savefig=savefig
    q1 -= 1
    q2 -= 1
    fig, ax = plt.subplots()
    colors = cm.rainbow(np.linspace(0, 1, len(V0)))
    for i, val in enumerate(V0):
        ax.scatter(V0[i,q1],V0[i,q2],label=labels[i],s=100,color=colors[i])
    
    for i, val in enumerate(V0):
        ax.annotate(labels[i], (V0[i,q1] + abs(V0[i,q1]/50), V0[i,q2] + abs(V0[i,q2]/50)))
    
    ax.set_xlabel(r'$PC_{0}$'.format(q1 + 1))
    ax.set_ylabel(r'$PC_{0}$'.format(q2 + 1))

    ax.legend(bbox_to_anchor=(0., 1.01, 1., 0.1),loc=3, ncol=6, mode="expand", borderaxespad=0.)
    plt.savefig(savefig, bbox_inches='tight')

def plot_scatter_3d(V0,labels,q1,q2,q3,savefig):
    from mpl_toolkits.mplot3d import Axes3D
    matplotlib.rcParams.update({'font.size': 10})
    print(len(labels),len(labels))
    savefig=savefig
    q1 -= 1
    q2 -= 1
    q3 -= 1
    fig = plt.figure()
    ax = Axes3D(fig)
    colors = cm.rainbow(np.linspace(0, 1, len(V0)))
    for i, val in enumerate(V0):
        ax.scatter(V0[i,q1],V0[i,q2],V0[i,q3],label=labels[i],color=colors[i])
    
    for i, val in enumerate(V0):
        ax.text(V0[i,q1], V0[i,q2], V0[i,q3], '%s' % (labels[i]))
    
    ax.set_xlabel(r'$PC_{0}$'.format(q1 + 1))
    ax.set_ylabel(r'$PC_{0}$'.format(q2 + 1))
    ax.set_ylabel(r'$PC_{0}$'.format(q3 + 1))

    ax.legend(bbox_to_anchor=(0., 1.01, 1., 0.1),loc=3, ncol=6, mode="expand", borderaxespad=0.)
    plt.savefig(savefig, bbox_inches='tight')    

In [None]:
data_files = []
data_files += sorted(glob.glob('*_dfi.csv'))
print(data_files)

datadic = {}
for data_file in data_files:
    file_id = str(data_file.split('_')[0])
    print(file_id)
    datadic[file_id]=pd.read_csv(data_file,index_col='ResI')

labels = datadic.keys()
print(len(labels))

num_muts=len(labels)
x = np.zeros((datadic[datadic.keys()[0].shape[0]],num_muts))

param='DFI_CA_pct'
for i in range(num_muts):
    label = labels[i]
    print(label)
    x[:,i] = datadic[label][param]


n=3 #Number of Modes
V0 = svd_function(x,n)

In [None]:
plot_scatter_2d(V0,labels,1,2,'sub1_2d_1_2.jpg')

In [None]:
plot_scatter_2d(V0,labels,1,3,'sub1_2d_1_3.jpg')

In [None]:
plot_scatter_2d(V0,labels,2,3,'sub1_2d_2_3.jpg')

In [None]:
plot_scatter_3d(V0,labels,1,2,3,'sub1_3d_1_2_3.jpg')