In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import LeavePOut
from sklearn.model_selection import StratifiedShuffleSplit

# Importing Data

Below the Dataset 7 and Dataset 8 are downloaded. The information for the chemicals that were used to treat the 3D Neural constructs are also downloaded. Dataset 7 represents Normalized gene expression (TPM) for day 16 neural constructs after dosing with toxic or non-toxic chemicals (two days of exposure). Dataset 8 represents Normalized gene expression (TPM) for day 21 neural constructs after dosing with toxic or non-toxic chemicals (seven days of exposure). 3D Neural constructs were grown for 14 days before chemical exposure.

In [4]:
day_21_df = pd.read_csv('/Users/danielaquijano/Documents/GitHub/Machine-Learning-Course-Projects/Final_Project_Neurotoxicity_Prediction/Final_Project_Files/Day_21_RNA_seq.csv')
day_21_df

Unnamed: 0,Gene.ID,Transcript_ID,day 21 b1a,day 21 b1b,day 21 b2a,day 21 b2b,day 21 b3a,day 21 b3b,day 21 b4a,day 21 b4b,...,day 21 t30a,day 21 t30b,day 21 t31a,day 21 t31b,day 21 t32a,day 21 t32b,day 21 t33a,day 21 t33b,day 21 t34a,day 21 t34b
0,A1BG,NM_130786.1,8.21,7.16,19.99,20.77,14.20,9.60,12.10,12.71,...,13.54,11.81,7.19,11.88,9.75,7.73,17.22,15.17,20.44,10.83
1,A1CF,"NM_001198818.1,NM_001198819.1,NM_001198820.1,N...",0.00,0.00,0.00,0.00,0.16,0.14,0.00,0.00,...,0.04,0.09,0.32,0.07,0.00,0.03,0.05,0.09,0.00,0.05
2,A2LD1,"NM_001195087.1,NM_033110.1",1.65,3.56,3.17,1.49,3.30,6.32,5.98,4.55,...,3.72,2.39,3.06,4.17,2.59,3.65,3.53,5.18,3.04,3.00
3,A2M,NM_000014.1,314.44,321.66,77.28,80.94,140.52,146.63,169.19,93.49,...,147.16,152.41,191.36,213.36,127.01,169.79,160.04,153.30,262.29,288.09
4,A2ML1,NM_144670.1,0.55,0.24,0.25,0.00,0.15,0.15,0.10,0.43,...,0.00,0.18,0.14,0.00,0.14,0.12,0.38,0.38,0.33,0.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19079,ZYG11A,NM_001004339.1,1.12,0.48,0.15,0.31,0.22,1.49,1.60,1.71,...,0.93,0.31,1.22,0.64,0.74,0.65,0.76,0.64,1.76,1.46
19080,ZYG11B,NM_024646.1,42.05,42.98,42.33,42.06,38.06,41.21,34.66,38.96,...,44.10,44.67,49.90,45.38,25.03,50.70,26.39,37.47,25.52,34.48
19081,ZYX,"NM_001010972.1,NM_003461.1",79.69,55.43,87.20,74.56,81.82,74.56,78.07,72.44,...,89.29,80.73,87.67,65.17,99.46,72.56,107.69,76.56,96.67,82.75
19082,ZZEF1,NM_015113.1,17.13,19.52,20.60,17.02,16.28,17.34,21.52,18.34,...,19.85,20.14,23.31,19.60,11.73,17.27,12.38,15.62,17.18,19.80


In [5]:
day_16_df = pd.read_csv('/Users/danielaquijano/Documents/GitHub/Machine-Learning-Course-Projects/Final_Project_Neurotoxicity_Prediction/Final_Project_Files/Day_16_RNA_seq.csv')
day_16_df

Unnamed: 0,Gene.ID,Transcript_ID,day 16 b1a,day 16 b1b,day 16 b2a,day 16 b2b,day 16 b3a,day 16 b3b,day 16 b4a,day 16 b4b,...,day 16 t30a,day 16 t30b,day 16 t31a,day 16 t31b,day 16 t32a,day 16 t32b,day 16 t33a,day 16 t33b,day 16 t34a,day 16 t34b
0,A1BG,NM_130786.1,9.57,9.83,15.34,14.42,10.43,8.29,11.39,13.67,...,13.81,12.15,14.24,11.22,14.95,10.58,9.75,12.41,16.38,10.78
1,A1CF,"NM_001198818.1,NM_001198819.1,NM_001198820.1,N...",0.00,0.04,0.13,0.00,0.27,0.00,0.08,0.00,...,0.27,0.00,0.00,0.22,0.00,0.19,0.00,0.19,0.00,0.19
2,A2LD1,"NM_001195087.1,NM_033110.1",4.68,4.46,1.61,3.24,2.70,2.98,3.06,1.39,...,3.37,2.12,2.20,2.96,3.54,1.85,1.95,3.50,3.85,3.55
3,A2M,NM_000014.1,368.93,350.46,171.77,119.16,235.31,292.92,184.61,172.11,...,277.85,212.12,242.60,271.34,221.18,227.61,204.09,173.85,230.21,323.13
4,A2ML1,NM_144670.1,0.00,0.00,0.17,0.00,0.22,0.00,0.25,0.13,...,0.00,0.00,0.00,0.25,0.15,0.31,0.00,0.00,0.39,0.09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19079,ZYG11A,NM_001004339.1,1.55,0.79,0.92,0.00,0.61,0.02,1.99,0.94,...,0.60,1.75,1.88,0.33,2.18,1.04,0.52,1.82,0.63,1.12
19080,ZYG11B,NM_024646.1,42.04,45.03,48.64,46.11,40.49,40.76,42.74,54.46,...,37.64,45.79,41.93,42.36,49.19,49.05,49.76,47.96,48.74,39.63
19081,ZYX,"NM_001010972.1,NM_003461.1",73.61,80.59,86.60,82.82,81.22,73.89,67.85,77.99,...,57.65,73.23,83.54,81.58,77.96,88.30,72.33,85.84,76.73,106.11
19082,ZZEF1,NM_015113.1,17.65,18.70,23.36,21.27,19.93,20.91,16.70,21.25,...,17.11,22.49,20.04,20.57,20.57,19.30,19.94,19.31,28.66,21.23


In [51]:
#Filter dataframes so that only rows of expresison values remain
day_16_filtered_df=day_16_df.iloc[0:19084,2:142]
day_21_filtered_df=day_21_df.iloc[0:19084,2:142]

In [52]:
day_16_filtered_df

Unnamed: 0,day 16 b1a,day 16 b1b,day 16 b2a,day 16 b2b,day 16 b3a,day 16 b3b,day 16 b4a,day 16 b4b,day 16 b5a,day 16 b5b,...,day 16 t30a,day 16 t30b,day 16 t31a,day 16 t31b,day 16 t32a,day 16 t32b,day 16 t33a,day 16 t33b,day 16 t34a,day 16 t34b
0,9.57,9.83,15.34,14.42,10.43,8.29,11.39,13.67,5.60,1.34,...,13.81,12.15,14.24,11.22,14.95,10.58,9.75,12.41,16.38,10.78
1,0.00,0.04,0.13,0.00,0.27,0.00,0.08,0.00,0.45,0.25,...,0.27,0.00,0.00,0.22,0.00,0.19,0.00,0.19,0.00,0.19
2,4.68,4.46,1.61,3.24,2.70,2.98,3.06,1.39,0.00,0.00,...,3.37,2.12,2.20,2.96,3.54,1.85,1.95,3.50,3.85,3.55
3,368.93,350.46,171.77,119.16,235.31,292.92,184.61,172.11,27.86,26.44,...,277.85,212.12,242.60,271.34,221.18,227.61,204.09,173.85,230.21,323.13
4,0.00,0.00,0.17,0.00,0.22,0.00,0.25,0.13,0.00,0.46,...,0.00,0.00,0.00,0.25,0.15,0.31,0.00,0.00,0.39,0.09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19079,1.55,0.79,0.92,0.00,0.61,0.02,1.99,0.94,2.17,2.14,...,0.60,1.75,1.88,0.33,2.18,1.04,0.52,1.82,0.63,1.12
19080,42.04,45.03,48.64,46.11,40.49,40.76,42.74,54.46,7.34,6.89,...,37.64,45.79,41.93,42.36,49.19,49.05,49.76,47.96,48.74,39.63
19081,73.61,80.59,86.60,82.82,81.22,73.89,67.85,77.99,62.48,55.84,...,57.65,73.23,83.54,81.58,77.96,88.30,72.33,85.84,76.73,106.11
19082,17.65,18.70,23.36,21.27,19.93,20.91,16.70,21.25,61.02,48.66,...,17.11,22.49,20.04,20.57,20.57,19.30,19.94,19.31,28.66,21.23


In [55]:
day_21_filtered_df

Unnamed: 0,day 21 b1a,day 21 b1b,day 21 b2a,day 21 b2b,day 21 b3a,day 21 b3b,day 21 b4a,day 21 b4b,day 21 b5a,day 21 b5b,...,day 21 t30a,day 21 t30b,day 21 t31a,day 21 t31b,day 21 t32a,day 21 t32b,day 21 t33a,day 21 t33b,day 21 t34a,day 21 t34b
0,8.21,7.16,19.99,20.77,14.20,9.60,12.10,12.71,3.99,3.90,...,13.54,11.81,7.19,11.88,9.75,7.73,17.22,15.17,20.44,10.83
1,0.00,0.00,0.00,0.00,0.16,0.14,0.00,0.00,0.37,1.05,...,0.04,0.09,0.32,0.07,0.00,0.03,0.05,0.09,0.00,0.05
2,1.65,3.56,3.17,1.49,3.30,6.32,5.98,4.55,0.27,0.00,...,3.72,2.39,3.06,4.17,2.59,3.65,3.53,5.18,3.04,3.00
3,314.44,321.66,77.28,80.94,140.52,146.63,169.19,93.49,2.39,2.43,...,147.16,152.41,191.36,213.36,127.01,169.79,160.04,153.30,262.29,288.09
4,0.55,0.24,0.25,0.00,0.15,0.15,0.10,0.43,0.13,0.31,...,0.00,0.18,0.14,0.00,0.14,0.12,0.38,0.38,0.33,0.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19079,1.12,0.48,0.15,0.31,0.22,1.49,1.60,1.71,2.19,2.80,...,0.93,0.31,1.22,0.64,0.74,0.65,0.76,0.64,1.76,1.46
19080,42.05,42.98,42.33,42.06,38.06,41.21,34.66,38.96,8.63,11.17,...,44.10,44.67,49.90,45.38,25.03,50.70,26.39,37.47,25.52,34.48
19081,79.69,55.43,87.20,74.56,81.82,74.56,78.07,72.44,35.02,25.76,...,89.29,80.73,87.67,65.17,99.46,72.56,107.69,76.56,96.67,82.75
19082,17.13,19.52,20.60,17.02,16.28,17.34,21.52,18.34,35.41,30.35,...,19.85,20.14,23.31,19.60,11.73,17.27,12.38,15.62,17.18,19.80


### Obtain Average of Day 16 and Day 21 gene expression

In [71]:
for i in range(len(day_21_filtered_df.columns)):
    df3.iloc[:,i] = (day_21_filtered_df.iloc[:,i]+day_16_filtered_df.iloc[:,i])/2

In [57]:
pd.set_option('display.max_columns', None)
df3.head()

Unnamed: 0,day 16 b1a,day 16 b1b,day 16 b2a,day 16 b2b,day 16 b3a,day 16 b3b,day 16 b4a,day 16 b4b,day 16 b5a,day 16 b5b,day 16 b6a,day 16 b6b,day 16 b7a,day 16 b7b,day 16 b8a,day 16 b8b,day 16 b9a,day 16 b9b,day 16 b10a,day 16 b10b,day 16 c1a,day 16 c1b,day 16 c2a,day 16 c2b,day 16 c3a,day 16 c3b,day 16 c4a,day 16 c4b,day 16 c5a,day 16 c5b,day 16 c6a,day 16 c6b,day 16 c7a,day 16 c7b,day 16 c8a,day 16 c8b,day 16 c9a,day 16 c9b,day 16 c10a,day 16 c10b,day 16 c11a,day 16 c11b,day 16 c12a,day 16 c12b,day 16 c13a,day 16 c13b,day 16 c14a,day 16 c14b,day 16 c15a,day 16 c15b,day 16 c16a,day 16 c16b,day 16 c17a,day 16 c17b,day 16 c18a,day 16 c18b,day 16 c19a,day 16 c19b,day 16 c20a,day 16 c20b,day 16 c21a,day 16 c21b,day 16 c22a,day 16 c22b,day 16 c23a,day 16 c23b,day 16 c24a,day 16 c24b,day 16 c25a,day 16 c25b,day 16 c26a,day 16 c26b,day 16 t1a,day 16 t1b,day 16 t2a,day 16 t2b,day 16 t3a,day 16 t3b,day 16 t4a,day 16 t4b,day 16 t5a,day 16 t5b,day 16 t6a,day 16 t6b,day 16 t7a,day 16 t7b,day 16 t8a,day 16 t8b,day 16 t9a,day 16 t9b,day 16 t10a,day 16 t10b,day 16 t11a,day 16 t11b,day 16 t12a,day 16 t12b,day 16 t13a,day 16 t13b,day 16 t14a,day 16 t14b,day 16 t15a,day 16 t15b,day 16 t16a,day 16 t16b,day 16 t17a,day 16 t17b,day 16 t18a,day 16 t18b,day 16 t19a,day 16 t19b,day 16 t20a,day 16 t20b,day 16 t21a,day 16 t21b,day 16 t22a,day 16 t22b,day 16 t23a,day 16 t23b,day 16 t24a,day 16 t24b,day 16 t25a,day 16 t25b,day 16 t26a,day 16 t26b,day 16 t27a,day 16 t27b,day 16 t28a,day 16 t28b,day 16 t29a,day 16 t29b,day 16 t30a,day 16 t30b,day 16 t31a,day 16 t31b,day 16 t32a,day 16 t32b,day 16 t33a,day 16 t33b,day 16 t34a,day 16 t34b,day 21 b1a,day 21 b1b,day 21 b2a,day 21 b2b,day 21 b3a,day 21 b3b,day 21 b4a,day 21 b4b,day 21 b5a,day 21 b5b,day 21 b6a,day 21 b6b,day 21 b7a,day 21 b7b,day 21 b8a,day 21 b8b,day 21 b9a,day 21 b9b,day 21 b10a,day 21 b10b,day 21 c1a,day 21 c1b,day 21 c2a,day 21 c2b,day 21 c3a,day 21 c3b,day 21 c4a,day 21 c4b,day 21 c5a,day 21 c5b,day 21 c6a,day 21 c6b,day 21 c7a,day 21 c7b,day 21 c8a,day 21 c8b,day 21 c9a,day 21 c9b,day 21 c10a,day 21 c10b,day 21 c11a,day 21 c11b,day 21 c12a,day 21 c12b,day 21 c13a,day 21 c13b,day 21 c14a,day 21 c14b,day 21 c15a,day 21 c15b,day 21 c16a,day 21 c16b,day 21 c17a,day 21 c17b,day 21 c18a,day 21 c18b,day 21 c19a,day 21 c19b,day 21 c20a,day 21 c20b,day 21 c21a,day 21 c21b,day 21 c22a,day 21 c22b,day 21 c23a,day 21 c23b,day 21 c24a,day 21 c24b,day 21 c25a,day 21 c25b,day 21 c26a,day 21 c26b,day 21 t1a,day 21 t1b,day 21 t2a,day 21 t2b,day 21 t3a,day 21 t3b,day 21 t4a,day 21 t4b,day 21 t5a,day 21 t5b,day 21 t6a,day 21 t6b,day 21 t7a,day 21 t7b,day 21 t8a,day 21 t8b,day 21 t9a,day 21 t9b,day 21 t10a,day 21 t10b,day 21 t11a,day 21 t11b,day 21 t12a,day 21 t12b,day 21 t13a,day 21 t13b,day 21 t14a,day 21 t14b,day 21 t15a,day 21 t15b,day 21 t16a,day 21 t16b,day 21 t17a,day 21 t17b,day 21 t18a,day 21 t18b,day 21 t19a,day 21 t19b,day 21 t20a,day 21 t20b,day 21 t21a,day 21 t21b,day 21 t22a,day 21 t22b,day 21 t23a,day 21 t23b,day 21 t24a,day 21 t24b,day 21 t25a,day 21 t25b,day 21 t26a,day 21 t26b,day 21 t27a,day 21 t27b,day 21 t28a,day 21 t28b,day 21 t29a,day 21 t29b,day 21 t30a,day 21 t30b,day 21 t31a,day 21 t31b,day 21 t32a,day 21 t32b,day 21 t33a,day 21 t33b,day 21 t34a,day 21 t34b
0,9.57,9.83,15.34,14.42,10.43,8.29,11.39,13.67,5.6,1.34,14.59,7.77,9.73,8.88,13.52,7.58,7.13,16.86,12.62,7.38,14.0,14.36,13.97,11.78,10.46,14.78,11.67,10.99,8.56,14.78,8.32,13.8,10.56,9.53,11.95,13.14,14.84,7.95,13.78,13.75,9.51,13.29,14.18,14.65,10.83,10.58,14.84,11.72,10.69,15.02,12.63,13.41,15.35,8.1,10.22,10.37,13.97,9.21,11.81,17.18,14.02,14.37,16.7,11.42,14.02,14.8,15.64,14.61,10.72,9.79,10.46,10.73,14.03,5.79,19.31,8.88,13.22,14.85,10.8,7.75,13.63,8.69,13.24,10.06,8.64,16.48,10.38,15.98,8.95,9.89,13.19,10.33,10.59,13.16,11.17,19.06,11.61,18.95,12.82,13.04,8.93,10.13,8.65,11.18,13.58,15.34,13.15,14.4,12.68,11.23,12.42,11.09,10.76,12.67,12.7,6.85,11.81,8.41,6.78,12.17,14.75,16.71,16.74,10.01,8.63,18.48,12.1,8.99,12.44,10.09,13.81,12.15,14.24,11.22,14.95,10.58,9.75,12.41,16.38,10.78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0.0,0.04,0.13,0.0,0.27,0.0,0.08,0.0,0.45,0.25,0.0,0.0,0.4,0.1,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.21,0.06,0.26,0.0,0.02,0.0,0.0,0.0,0.05,0.0,0.0,0.15,0.0,0.0,0.0,0.22,0.0,0.0,0.0,0.06,0.06,0.0,0.0,0.17,0.05,0.12,0.0,0.0,0.0,0.18,0.0,0.0,0.17,0.0,0.0,0.15,0.0,0.05,0.0,0.16,0.06,0.0,0.0,0.25,0.0,0.24,0.0,0.0,0.13,0.05,0.0,0.0,0.0,0.14,0.08,0.08,0.23,0.12,0.07,0.0,0.0,0.0,0.15,0.06,0.0,0.0,0.06,0.2,0.0,0.0,0.0,0.06,0.0,0.08,0.08,0.23,0.12,0.15,0.1,0.2,0.0,0.07,0.0,0.0,0.22,0.04,0.0,0.0,0.0,0.04,0.0,0.09,0.06,0.17,0.0,0.0,0.0,0.07,0.17,0.24,0.0,0.1,0.0,0.0,0.22,0.0,0.27,0.0,0.0,0.22,0.0,0.19,0.0,0.19,0.0,0.19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,4.68,4.46,1.61,3.24,2.7,2.98,3.06,1.39,0.0,0.0,3.84,4.99,2.32,2.05,1.91,4.99,5.69,1.89,1.24,5.28,2.92,3.82,4.79,2.89,3.16,2.92,3.2,3.04,4.37,1.99,3.29,2.63,4.14,2.21,4.43,1.31,5.68,4.76,2.47,1.93,5.37,3.48,2.86,2.83,3.34,3.19,4.28,3.62,3.31,3.5,3.36,3.28,5.8,5.06,1.17,3.97,2.63,2.35,8.36,4.27,3.79,2.42,2.69,3.17,3.3,5.7,3.98,5.0,1.93,3.65,3.74,1.6,4.96,3.24,3.62,2.91,5.69,2.49,2.73,2.66,3.19,3.39,3.76,2.08,3.93,1.78,2.78,3.44,2.78,2.44,3.48,2.13,7.85,1.77,5.21,4.04,1.27,0.83,0.65,0.2,3.05,3.24,4.7,2.61,2.98,2.46,2.31,4.42,2.47,1.48,4.39,2.97,2.67,2.97,4.28,2.87,4.0,6.53,2.2,5.11,1.62,2.5,3.39,3.62,3.85,5.22,2.45,3.41,4.21,3.35,3.37,2.12,2.2,2.96,3.54,1.85,1.95,3.5,3.85,3.55,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,368.93,350.46,171.77,119.16,235.31,292.92,184.61,172.11,27.86,26.44,208.13,203.13,238.37,196.38,249.4,192.84,257.61,245.86,255.38,219.06,211.75,238.54,167.05,182.5,216.99,198.09,231.27,213.97,230.51,195.99,256.69,240.51,265.94,279.54,316.27,191.34,347.59,244.82,216.15,234.06,297.64,322.89,302.2,264.73,164.5,256.21,189.87,209.08,146.09,248.96,222.82,199.47,193.27,162.52,229.92,230.62,236.05,243.56,247.66,222.09,236.42,265.55,175.89,220.2,203.99,215.51,296.04,323.86,239.42,259.67,220.6,221.86,230.39,225.82,238.62,198.01,102.05,157.47,251.8,174.06,224.07,174.31,262.88,280.7,245.01,236.19,228.6,214.41,178.01,175.98,90.73,75.89,292.17,267.25,250.74,292.48,115.78,108.79,167.62,179.54,177.0,280.49,214.62,228.59,284.08,415.4,252.9,297.57,230.48,287.76,235.31,277.54,267.92,390.25,199.75,238.36,191.14,181.37,228.56,235.58,269.39,270.32,142.14,122.48,236.06,234.31,199.5,208.57,177.17,155.63,277.85,212.12,242.6,271.34,221.18,227.61,204.09,173.85,230.21,323.13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.0,0.0,0.17,0.0,0.22,0.0,0.25,0.13,0.0,0.46,0.03,0.63,0.23,0.04,0.0,0.11,0.34,0.48,0.76,0.0,0.11,0.26,0.0,0.18,0.76,0.0,0.0,0.1,0.0,0.0,0.35,0.34,0.17,0.19,0.0,0.0,0.28,0.49,0.54,0.87,0.16,0.39,0.1,0.37,0.0,0.49,0.31,0.51,0.0,0.11,0.25,0.0,0.0,0.22,0.47,0.0,0.31,0.12,0.0,0.0,0.12,0.34,0.49,0.0,0.03,0.64,0.12,0.1,0.12,0.52,0.42,0.0,0.0,0.28,0.04,0.0,0.06,0.0,0.0,0.0,0.54,0.0,0.5,0.18,0.76,0.3,0.13,0.07,0.2,0.41,0.44,0.42,0.19,0.2,0.12,0.0,1.42,0.42,1.11,1.45,0.09,0.42,0.13,0.0,0.0,0.3,0.48,0.57,0.3,0.22,0.19,0.4,0.05,0.0,0.65,0.7,0.11,0.46,0.0,0.0,0.14,0.28,0.11,0.26,0.26,0.3,0.21,0.1,0.22,0.0,0.0,0.0,0.0,0.25,0.15,0.31,0.0,0.0,0.39,0.09,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [81]:
df3 = df.drop(df3.iloc[:,139:280], axis=1)

Unnamed: 0,day 16 b1a,day 16 b1b,day 16 b2a,day 16 b2b,day 16 b3a,day 16 b3b,day 16 b4a,day 16 b4b,day 16 b5a,day 16 b5b,day 16 b6a,day 16 b6b,day 16 b7a,day 16 b7b,day 16 b8a,day 16 b8b,day 16 b9a,day 16 b9b,day 16 b10a,day 16 b10b,day 16 c1a,day 16 c1b,day 16 c2a,day 16 c2b,day 16 c3a,day 16 c3b,day 16 c4a,day 16 c4b,day 16 c5a,day 16 c5b,day 16 c6a,day 16 c6b,day 16 c7a,day 16 c7b,day 16 c8a,day 16 c8b,day 16 c9a,day 16 c9b,day 16 c10a,day 16 c10b,day 16 c11a,day 16 c11b,day 16 c12a,day 16 c12b,day 16 c13a,day 16 c13b,day 16 c14a,day 16 c14b,day 16 c15a,day 16 c15b,day 16 c16a,day 16 c16b,day 16 c17a,day 16 c17b,day 16 c18a,day 16 c18b,day 16 c19a,day 16 c19b,day 16 c20a,day 16 c20b,day 16 c21a,day 16 c21b,day 16 c22a,day 16 c22b,day 16 c23a,day 16 c23b,day 16 c24a,day 16 c24b,day 16 c25a,day 16 c25b,day 16 c26a,day 16 c26b,day 16 t1a,day 16 t1b,day 16 t2a,day 16 t2b,day 16 t3a,day 16 t3b,day 16 t4a,day 16 t4b,day 16 t5a,day 16 t5b,day 16 t6a,day 16 t6b,day 16 t7a,day 16 t7b,day 16 t8a,day 16 t8b,day 16 t9a,day 16 t9b,day 16 t10a,day 16 t10b,day 16 t11a,day 16 t11b,day 16 t12a,day 16 t12b,day 16 t13a,day 16 t13b,day 16 t14a,day 16 t14b,day 16 t15a,day 16 t15b,day 16 t16a,day 16 t16b,day 16 t17a,day 16 t17b,day 16 t18a,day 16 t18b,day 16 t19a,day 16 t19b,day 16 t20a,day 16 t20b,day 16 t21a,day 16 t21b,day 16 t22a,day 16 t22b,day 16 t23a,day 16 t23b,day 16 t24a,day 16 t24b,day 16 t25a,day 16 t25b,day 16 t26a,day 16 t26b,day 16 t27a,day 16 t27b,day 16 t28a,day 16 t28b,day 16 t29a,day 16 t29b,day 16 t30a,day 16 t30b,day 16 t31a,day 16 t31b,day 16 t32a,day 16 t32b,day 16 t33a,day 16 t33b,day 16 t34a,day 21 t34b
1,0.00,0.04,0.13,0.00,0.27,0.00,0.08,0.00,0.45,0.25,0.00,0.00,0.40,0.10,0.00,0.00,0.06,0.00,0.00,0.00,0.00,0.00,0.00,0.07,0.21,0.06,0.26,0.00,0.02,0.00,0.00,0.00,0.05,0.00,0.00,0.15,0.00,0.00,0.00,0.22,0.00,0.00,0.00,0.06,0.06,0.00,0.00,0.17,0.05,0.12,0.00,0.00,0.00,0.18,0.00,0.00,0.17,0.00,0.00,0.15,0.00,0.05,0.00,0.16,0.06,0.00,0.00,0.25,0.00,0.24,0.00,0.00,0.13,0.05,0.00,0.00,0.00,0.14,0.08,0.08,0.23,0.12,0.07,0.00,0.00,0.00,0.15,0.06,0.00,0.00,0.06,0.20,0.00,0.00,0.00,0.06,0.00,0.08,0.08,0.23,0.12,0.15,0.10,0.20,0.00,0.07,0.00,0.00,0.22,0.04,0.00,0.00,0.00,0.04,0.00,0.09,0.06,0.17,0.00,0.00,0.00,0.07,0.17,0.24,0.00,0.10,0.00,0.00,0.22,0.00,0.27,0.00,0.00,0.22,0.00,0.19,0.00,0.19,0.00,
2,4.68,4.46,1.61,3.24,2.70,2.98,3.06,1.39,0.00,0.00,3.84,4.99,2.32,2.05,1.91,4.99,5.69,1.89,1.24,5.28,2.92,3.82,4.79,2.89,3.16,2.92,3.20,3.04,4.37,1.99,3.29,2.63,4.14,2.21,4.43,1.31,5.68,4.76,2.47,1.93,5.37,3.48,2.86,2.83,3.34,3.19,4.28,3.62,3.31,3.50,3.36,3.28,5.80,5.06,1.17,3.97,2.63,2.35,8.36,4.27,3.79,2.42,2.69,3.17,3.30,5.70,3.98,5.00,1.93,3.65,3.74,1.60,4.96,3.24,3.62,2.91,5.69,2.49,2.73,2.66,3.19,3.39,3.76,2.08,3.93,1.78,2.78,3.44,2.78,2.44,3.48,2.13,7.85,1.77,5.21,4.04,1.27,0.83,0.65,0.20,3.05,3.24,4.70,2.61,2.98,2.46,2.31,4.42,2.47,1.48,4.39,2.97,2.67,2.97,4.28,2.87,4.00,6.53,2.20,5.11,1.62,2.50,3.39,3.62,3.85,5.22,2.45,3.41,4.21,3.35,3.37,2.12,2.20,2.96,3.54,1.85,1.95,3.50,3.85,
3,368.93,350.46,171.77,119.16,235.31,292.92,184.61,172.11,27.86,26.44,208.13,203.13,238.37,196.38,249.40,192.84,257.61,245.86,255.38,219.06,211.75,238.54,167.05,182.50,216.99,198.09,231.27,213.97,230.51,195.99,256.69,240.51,265.94,279.54,316.27,191.34,347.59,244.82,216.15,234.06,297.64,322.89,302.20,264.73,164.50,256.21,189.87,209.08,146.09,248.96,222.82,199.47,193.27,162.52,229.92,230.62,236.05,243.56,247.66,222.09,236.42,265.55,175.89,220.20,203.99,215.51,296.04,323.86,239.42,259.67,220.60,221.86,230.39,225.82,238.62,198.01,102.05,157.47,251.80,174.06,224.07,174.31,262.88,280.70,245.01,236.19,228.60,214.41,178.01,175.98,90.73,75.89,292.17,267.25,250.74,292.48,115.78,108.79,167.62,179.54,177.00,280.49,214.62,228.59,284.08,415.40,252.90,297.57,230.48,287.76,235.31,277.54,267.92,390.25,199.75,238.36,191.14,181.37,228.56,235.58,269.39,270.32,142.14,122.48,236.06,234.31,199.50,208.57,177.17,155.63,277.85,212.12,242.60,271.34,221.18,227.61,204.09,173.85,230.21,
4,0.00,0.00,0.17,0.00,0.22,0.00,0.25,0.13,0.00,0.46,0.03,0.63,0.23,0.04,0.00,0.11,0.34,0.48,0.76,0.00,0.11,0.26,0.00,0.18,0.76,0.00,0.00,0.10,0.00,0.00,0.35,0.34,0.17,0.19,0.00,0.00,0.28,0.49,0.54,0.87,0.16,0.39,0.10,0.37,0.00,0.49,0.31,0.51,0.00,0.11,0.25,0.00,0.00,0.22,0.47,0.00,0.31,0.12,0.00,0.00,0.12,0.34,0.49,0.00,0.03,0.64,0.12,0.10,0.12,0.52,0.42,0.00,0.00,0.28,0.04,0.00,0.06,0.00,0.00,0.00,0.54,0.00,0.50,0.18,0.76,0.30,0.13,0.07,0.20,0.41,0.44,0.42,0.19,0.20,0.12,0.00,1.42,0.42,1.11,1.45,0.09,0.42,0.13,0.00,0.00,0.30,0.48,0.57,0.30,0.22,0.19,0.40,0.05,0.00,0.65,0.70,0.11,0.46,0.00,0.00,0.14,0.28,0.11,0.26,0.26,0.30,0.21,0.10,0.22,0.00,0.00,0.00,0.00,0.25,0.15,0.31,0.00,0.00,0.39,
5,1.15,0.00,0.59,0.00,0.00,0.32,0.00,0.00,1.35,0.00,0.31,0.00,0.00,0.00,0.00,0.32,0.63,1.21,1.87,0.00,0.86,0.65,0.49,0.00,1.66,0.00,0.00,0.26,0.55,0.25,0.00,0.78,0.24,1.18,0.94,0.34,0.00,0.91,0.79,0.99,1.14,0.78,0.97,0.55,0.00,0.29,0.52,0.51,0.00,0.26,0.61,0.64,0.30,0.00,0.30,0.28,0.56,0.75,1.84,0.00,0.00,0.84,0.00,0.00,0.28,0.71,0.30,0.28,0.00,0.00,0.00,0.00,0.00,0.49,0.29,0.00,0.00,0.00,1.40,0.28,0.00,1.08,0.56,0.24,0.75,0.00,0.00,0.00,0.00,0.52,0.00,0.25,1.95,1.76,1.52,0.54,0.00,0.00,0.00,0.26,0.00,0.00,0.32,0.91,1.27,4.41,0.00,0.00,0.24,0.27,0.24,0.00,3.62,3.79,0.52,0.00,0.00,0.00,0.00,0.57,1.38,4.15,0.27,0.54,0.47,0.00,0.00,0.27,0.28,0.28,0.92,0.00,1.02,0.42,0.00,0.00,0.00,0.25,0.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19079,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.46
19080,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,34.48
19081,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,82.75
19082,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,19.80


Next, combine day_16_df and day_21_df into a single dataframe that reflects the averge values of gene expression between the the two dataframes

In [None]:
for i in range(len(raw_data)):
    for j in range(1, len(raw_data.columns)):
        raw_data_normalized.iat[i,j]=(int(raw_data.iat[i,j])*1000000)/int(raw_cols_sums[j])


Log-2 fold change 

Visualize Normalization with boxplot

Visualize Log-2fold change

Add labels of toxic/nontoxic

### Exploratory Data Analysis

Heat map of genes with log2 fold change above a specific thershold

### PCA

### Unsupervised Clustering 

In [None]:
Multidimensional Scaling


### GO term enrichment

### Differentially Expressed Genes

### Obtain Top 100 Differentially Expressed Genes

In [None]:
Visualizing Top 100 Differentially Expressed Gnes

# Implementation of Classification Algorithms

In [None]:
Support Vector Machines