In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import numpy as np

In [None]:
# Reads the data into a dataframe and renames the columns to remove unnecessary numbers 
data_df = pd.read_excel("RNASeqData.xlsx")
data_df = data_df.rename(columns=lambda x: re.sub("w_[0-9]+_O", "w_O",x))
data_df = data_df.rename(columns=lambda x: re.sub("m_[0-9]+_O", "m_O",x))
columns = list(data_df.columns) 

In [None]:
# Filtering Significant Genes - As per previous Analysis
significant_genes = [8384, 2066, 13503, 7204, 188, 10516, 5750, 3290]
significant_data_df = data_df.loc[data_df['RowID'].isin(significant_genes)]

In [None]:
norm_cols = [x for x in columns if "Norm" in x]
norm_cols_healthy = [x for x in norm_cols if "HC" in x]
norm_cols_patient = [x for x in norm_cols if "HC" not in x]
norm_cols_base = [x for x in norm_cols_patient if "Base" in x]

In [None]:
#Preparing Data
df_norm_healthy = significant_data_df[norm_cols_healthy]
df_norm_healthy_T = df_norm_healthy.T
labels_healthy = [1]*20

df_norm_base = significant_data_df[norm_cols_base]
df_norm_base_T = df_norm_base.T
labels_patient = [0]*20

In [None]:
X1 = df_norm_healthy_T.values
X2 = df_norm_base_T.values
Y1 = np.zeros((1,20))
Y2 = np.ones((1,20))

In [None]:
X = np.concatenate((X1, X2), axis=0)
X.shape

(40, 8)

In [None]:
Y = np.concatenate((Y1, Y2), axis=1)
Y = np.reshape(Y, 40)
Y.shape

(40,)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20)

**SVM**

In [None]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, Y_train)
Y_pred = svclassifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(Y_test,Y_pred))
print(classification_report(Y_test,Y_pred))
print("Predictions: ", Y_pred)
print("Expected:    ", Y_test)

[[3 1]
 [0 4]]
              precision    recall  f1-score   support

         0.0       1.00      0.75      0.86         4
         1.0       0.80      1.00      0.89         4

    accuracy                           0.88         8
   macro avg       0.90      0.88      0.87         8
weighted avg       0.90      0.88      0.87         8

Predictions:  [0. 1. 0. 0. 1. 1. 1. 1.]
Expected:     [0. 1. 0. 0. 1. 0. 1. 1.]


**Decision Trees**

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtclassifier = DecisionTreeClassifier(random_state=0)
dtclassifier.fit(X_train, Y_train)
Y_pred_dt = dtclassifier.predict(X_test)
print(confusion_matrix(Y_test,Y_pred_dt))
print(classification_report(Y_test,Y_pred_dt))
print("Predictions: ", Y_pred_dt)
print("Expected:    ", Y_test)

[[3 1]
 [0 4]]
              precision    recall  f1-score   support

         0.0       1.00      0.75      0.86         4
         1.0       0.80      1.00      0.89         4

    accuracy                           0.88         8
   macro avg       0.90      0.88      0.87         8
weighted avg       0.90      0.88      0.87         8

Predictions:  [0. 1. 0. 0. 1. 1. 1. 1.]
Expected:     [0. 1. 0. 0. 1. 0. 1. 1.]


**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfclassifier = RandomForestClassifier(n_estimators=100)
rfclassifier.fit(X_train, Y_train)
Y_pred_rf = rfclassifier.predict(X_test)
print(confusion_matrix(Y_test,Y_pred_rf))
print(classification_report(Y_test,Y_pred_rf))
print("Predictions: ", Y_pred_rf)
print("Expected:    ", Y_test)

[[3 1]
 [0 4]]
              precision    recall  f1-score   support

         0.0       1.00      0.75      0.86         4
         1.0       0.80      1.00      0.89         4

    accuracy                           0.88         8
   macro avg       0.90      0.88      0.87         8
weighted avg       0.90      0.88      0.87         8

Predictions:  [0. 1. 0. 0. 1. 1. 1. 1.]
Expected:     [0. 1. 0. 0. 1. 0. 1. 1.]
