# LIBRARIES

In [None]:
import os
import sys 
import numpy as np 
import pandas as pd
import yellowbrick as yb 
import matplotlib as mpl 
import matplotlib.pyplot as plt 
from yellowbrick.features import PCADecomposition
from yellowbrick.features import RadViz 
from sklearn.preprocessing import LabelEncoder
from yellowbrick.features import Manifold
%matplotlib inline
mpl.rcParams["figure.figsize"] = (9,6)

# FUNCTIONS

In [None]:
def load_data_complete_s1(path):
    df_complete=pd.read_csv(path)
    
    # Scenario 1: Tumor_Core & Tumor_Periphery
    # Se procede a eliminar el N_Periphery

    df2 = df_complete.copy()
    df2.drop(df2[df2.classes == "NP"].index, inplace=True)  
    
    # Eliminamos los labels
    features = df2.copy()
    features = features.drop(['classes'], axis=1)
    
    #Extraemos los labels
    labels = df2.copy()
    labels = labels['classes'].values
    
    return features,labels

def load_data_complete_s2(path):
    df_complete=pd.read_csv(path)
    
    # Scenario 2: Normal_Periphery & Tumor_Periphery
    # Se procede a eliminar el T_Core

    df2 = df_complete.copy()
    df2.drop(df2[df2.classes == "TC"].index, inplace=True)  
    
    # Eliminamos los labels
    features = df2.copy()
    features = features.drop(['classes'], axis=1)
    
    #Extraemos los labels
    labels = df2.copy()
    labels = labels['classes'].values
    
    return features,labels

def load_data_complete_s3(path):
    df_complete=pd.read_csv(path)
    # Scenario 3: Tumor_Periphery&Core & Normal_Periphery
    
    # Se procede aislar al N_Periphery
    df2 = df_complete.copy()
    df2.drop(df2[df2.classes == "TP"].index, inplace=True)  
    df2.drop(df2[df2.classes == "TC"].index, inplace=True)  
    
    # Eliminamos el N_Periphery
    df3 = df_complete.copy()
    df3.drop(df3[df3.classes == "NP"].index, inplace=True) 
    
    # y luego se procede a renombrar la columna classes con T_PC, al quedar la unión de estas
    df3["classes"] = "TPC"
    
    # Se procede a crear el DF ya con las clases que corresponde al Escenario 3: Tumor_Periphery&Core & Normal_Periphery
    #df2 N_Periphery
    #df3 T_PC

    df4 = pd.concat([df2,df3]).reset_index(drop=True) 
    
    # Eliminamos los labels
    features = df4.copy()
    features = features.drop(['classes'], axis=1)
    
    #Extraemos los labels
    labels = df4.copy()

    labels = labels['classes'].values
    
    return features,labels

def load_data_complete_s4(path):
    df_complete=pd.read_csv(path)
    
    # Scenario 4 new: Tumor_Core & Tumor_Periphery & N_Periphery

    # Eliminamos los labels
    features = df_complete.copy()
    features = features.drop(['classes'], axis=1)
    
    #Extraemos los labels
    labels = df_complete.copy()
    labels = labels['classes'].values
    
    return features,labels

# LOADING DATA

In [None]:
path = '../Data/DATA_Complete_GBM.csv'

featuress1,labelss1=load_data_complete_s1(path)
featuress2,labelss2=load_data_complete_s2(path)
featuress3,labelss3=load_data_complete_s3(path)
featuress4,labelss4=load_data_complete_s4(path)


# PREPARING DATA

In [None]:
features65  =  ['ATP1A2','NMB','SPARCL1','USMG5','PTN','PCSK1N','ANAPC11','TMSB10','TMEM144','PSMB4',
                'NRBP2','FTL','MIR3682','S1PR1','PRODH','SRP9','GAP43','RPL30','LAMA5','ECHDC2',
                'EGFR','CALM1','APOD','SPOCK1','ANXA1','PTGDS','EIF1','VIM','MGLL','ITM2C',
                'PLLP','ITGB8','HES6','RPS27L','GFAP','TRIM2','APOE','ANXA5','NAV1','TMSB4X',
                'HSPB1','SEC61G','IGSF6','IGFBP2','RPLP1','CSF1R','NACA','HTRA1','CSF3R','CREG1',
                'FAM107B','SLAMF9','GLDN','EMP3','COMMD6','ANXA2','RPL38','CEBPD','APBB1IP','HLA-DRB6',
                'TUBGCP2','LCP2','LOC100505854','IFI44','GNG11']

features12 = ['ANXA1','APOD','ATP1A2','CSF1R','EGFR','FTL','GFAP','ITGB8','PTGDS','SRP9','TMSB10','TMSB4X']

features8  = ["ATP1A2", "SPARCL1", "FTL", "EGFR", "SPOCK1", "ANXA1", "APOD", "TMSB4X"]

y = labelss4

#23,368 genes
X23368= featuress4 

#65 genes
X65 = featuress4[features65]

#12 genes
X12 = featuress4[features12]

#8 genes
X8 = featuress4[features8]

classes=list(np.unique(labelss4))

# t-SNE

In [None]:
classes

In [None]:
X=X8

viz = Manifold(manifold="tsne", classes=classes, colormap='tab20c',projection=2, alpha=1)
viz.fit_transform(X, LabelEncoder().fit_transform(y))   
viz.show()              

In [None]:
nrows=1
ncols=4

fig = plt.figure(figsize=(5*ncols,5*nrows))
fig.subplots_adjust(hspace=0.45, wspace=0.02)

############################################### 23368 genes
i=1
ax = fig.add_subplot(nrows, ncols, i)

X=X23368

viz = Manifold(manifold="tsne", classes=classes, colormap='tab20c',projection=2, alpha=1)
viz.fit_transform(X, LabelEncoder().fit_transform(y))   
viz.finalize()

ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
#for spine in plt.gca().spines.values():
#    spine.set_visible(False)

title="A"
ax.set_title(title,fontsize=35,y=1.05)
############################################### 65 genes
i=2
ax = fig.add_subplot(nrows, ncols, i)

X=X65

viz = Manifold(manifold="tsne", classes=classes, colormap='tab20c',projection=2, alpha=1)
viz.fit_transform(X, LabelEncoder().fit_transform(y))   
viz.finalize()

ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
#for spine in plt.gca().spines.values():
#    spine.set_visible(False)

title="B"
ax.set_title(title,fontsize=35,y=1.05)
############################################### 12 genes
i=3
ax = fig.add_subplot(nrows, ncols, i)

X=X12

viz = Manifold(manifold="tsne", classes=classes, colormap='tab20c',projection=2, alpha=1)
viz.fit_transform(X, LabelEncoder().fit_transform(y))   
viz.finalize()

ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
#for spine in plt.gca().spines.values():
#    spine.set_visible(False)

title="C"
ax.set_title(title,fontsize=35,y=1.05)
############################################### 8 genes
i=4
ax = fig.add_subplot(nrows, ncols, i)

X=X8

viz = Manifold(manifold="tsne", classes=classes, colormap='tab20c',projection=2, alpha=1)
viz.fit_transform(X, LabelEncoder().fit_transform(y))   
viz.finalize()
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
#for spine in plt.gca().spines.values():
#    spine.set_visible(False)

title="D"
ax.set_title(title,fontsize=35,y=1.05)
###############################################
model_name = "t-SNE"
fig.savefig("./images"+"/"+model_name+".pdf", bbox_inches = "tight", format='pdf') 

# RadViz and PCA 2D

In [None]:
classes

In [None]:
X=X8

visualizer = RadViz(classes=classes, colormap='tab20c', alpha=1)                 # Instantiate the visualizer
visualizer.fit(X, LabelEncoder().fit_transform(y))   # Fit the data to the visualizer
visualizer.transform(X)                              # Transform the data
visualizer.show()                                    # Finalize and render the figure

In [None]:
X=X8

visualizer = PCADecomposition(classes=classes, proj_features=True, projection=2, colormap='tab20c', alpha=1)
visualizer.fit_transform(X,LabelEncoder().fit_transform(y))
visualizer.show()

# RadViz - 8 AND 12 GENES OR FEATURES

In [None]:
nrows=1
ncols=2

fig = plt.figure(figsize=(5*ncols,5*nrows))
fig.subplots_adjust(hspace=0.45, wspace=0.2)

############################################### 12 genes
i=1
ax = fig.add_subplot(nrows, ncols, i)

X=X12

visualizer = RadViz(classes=classes, colormap='tab20c', alpha=1)                  
visualizer.fit(X, LabelEncoder().fit_transform(y))   
visualizer.transform(X)                               
visualizer.finalize()                                

ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
for spine in plt.gca().spines.values():
    spine.set_visible(False)

title="A"
ax.set_title(title,fontsize=25,y=1.01)
ax.legend(loc=[0.7,0.95])
############################################### 8 genes
i=2
ax = fig.add_subplot(nrows, ncols, i)

X=X8

visualizer = RadViz(classes=classes, colormap='tab20c', alpha=1)                  
visualizer.fit(X, LabelEncoder().fit_transform(y))   
visualizer.transform(X)                               
visualizer.finalize()                                

ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(False)
for spine in plt.gca().spines.values():
    spine.set_visible(False)

title="B"
ax.set_title(title,fontsize=25,y=1.01)
ax.legend(loc=[0.7,0.95])
###############################################
model_name = "RadViz_12_8"
fig.savefig("./images"+"/"+model_name+".pdf", bbox_inches = "tight", format='pdf') 

# PCA 3D - 8 AND 12 GENES OR FEATURES

In [None]:
X=X12
plt.figure(figsize=(12,12))
visualizer = PCADecomposition(classes=classes, proj_features=True, projection=3, colormap='tab20c', alpha=1)
visualizer.fit_transform(X,LabelEncoder().fit_transform(y))
model_name="PCA_12"
visualizer.show(outpath="./images"+"/"+model_name+".pdf",bbox_inches = "tight", format='pdf')                  


In [None]:
X=X8
plt.figure(figsize=(12,12))
visualizer = PCADecomposition(classes=classes, proj_features=True, projection=3, colormap='tab20c', alpha=1)
visualizer.fit_transform(X,LabelEncoder().fit_transform(y))
model_name="PCA_8"
visualizer.show(outpath="./images"+"/"+model_name+".pdf",bbox_inches = "tight", format='pdf')                  
