In [1]:
import os
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import balanced_accuracy_score

## Coef analysis on data from tuning
## Week 1
### Enet

In [2]:
df = pd.read_csv('../Week1_IgGM_PosNeg_grouped.txt', sep='\t', index_col=0)
df.dropna(inplace=True)
Y = df.Status.copy()
X = df.iloc[:,7:].copy()

if not os.path.isdir("Tables"):
    os.makedirs("Tables")
os.chdir("Tables")

enet= LogisticRegression(penalty = 'elasticnet', C=100, l1_ratio=0.9, solver = 'saga', max_iter=int(1e6)).fit(X,Y)

EnetCOEFS = pd.DataFrame(np.transpose(enet.coef_), X.columns, columns=['Week1_Coefficients']).sort_values('Week1_Coefficients', ascending=False)

### Logit

In [3]:
LassoSTRONG= LogisticRegression(penalty = 'l1', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoSTRONG_coefs = pd.DataFrame(np.transpose(LassoSTRONG.coef_), X.columns, columns=['Week1_Coefficients']).sort_values('Week1_Coefficients', ascending=False)

LassoWEAK= LogisticRegression(penalty = 'l1', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoWEAK_coefs = pd.DataFrame(np.transpose(LassoWEAK.coef_), X.columns, columns=['Week1_Coefficients']).sort_values('Week1_Coefficients', ascending=False)


RidgeSTRONG= LogisticRegression(penalty = 'l2', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeSTRONG_coefs = pd.DataFrame(np.transpose(RidgeSTRONG.coef_), X.columns, columns=['Week1_Coefficients']).sort_values('Week1_Coefficients', ascending=False)

RidgeWEAK= LogisticRegression(penalty = 'l2', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeWEAK_coefs = pd.DataFrame(np.transpose(RidgeWEAK.coef_), X.columns, columns=['Week1_Coefficients']).sort_values('Week1_Coefficients', ascending=False)


In [4]:
with pd.ExcelWriter("Week1_Coeffs.xlsx") as writer:
    EnetCOEFS.to_excel(writer, sheet_name="Week1_Enet", index=True)
    LassoSTRONG_coefs.to_excel(writer, sheet_name="Week1_LassoStrong", index=True)
    LassoWEAK_coefs.to_excel(writer, sheet_name="Week1_LassoWeak", index=True)
    RidgeSTRONG_coefs.to_excel(writer, sheet_name="Week1_RidgeStrong", index=True)
    RidgeWEAK_coefs.to_excel(writer, sheet_name="Week1_RidgeWeak", index=True)


### XGB

## Week 1 Score and coefs from Validation set

In [5]:
# df = pd.read_csv('../../ValidationSet/ValidationSet.txt', sep='\t', index_col=0)
# df = df.loc[(df['TimeGroup']=='Week1')|(df['TimeGroup']=='Healthy')]
# Xval = df.iloc[:,7:].copy()
# Yval = df.Status.copy()
# print(Yval.value_counts())

In [6]:
# print("ENET: BA score", balanced_accuracy_score(Yval, enet.predict(Xval)))
# print('LassoSTRONG: BA score', balanced_accuracy_score(Yval, LassoSTRONG.predict(Xval)))
# print("LassoWEAK: BA score", balanced_accuracy_score(Yval, LassoWEAK.predict(Xval)))
# print("RidgeSTRONG: BA score", balanced_accuracy_score(Yval, RidgeSTRONG.predict(Xval)))
# print("RidgeWEAK: BA score", balanced_accuracy_score(Yval, RidgeWEAK.predict(Xval)))

### XGB

## Coef analysis on data from tuning
## Week 2
### Enet

In [7]:
df = pd.read_csv('../../Week2_IgGM_PosNeg_grouped.txt', sep='\t', index_col=0)
df.dropna(inplace=True)
Y = df.Status.copy()
X = df.iloc[:,7:].copy()


# os.chdir("Tables")

enet= LogisticRegression(penalty = 'elasticnet', C=100, l1_ratio=0.9, solver = 'saga', max_iter=int(1e6)).fit(X,Y)

EnetCOEFS = pd.DataFrame(np.transpose(enet.coef_), X.columns, columns=['Week2_Coefficients']).sort_values('Week2_Coefficients', ascending=False)

### Logit

In [8]:
LassoSTRONG= LogisticRegression(penalty = 'l1', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoSTRONG_coefs = pd.DataFrame(np.transpose(LassoSTRONG.coef_), X.columns, columns=['Week2_Coefficients']).sort_values('Week2_Coefficients', ascending=False)

LassoWEAK= LogisticRegression(penalty = 'l1', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoWEAK_coefs = pd.DataFrame(np.transpose(LassoWEAK.coef_), X.columns, columns=['Week2_Coefficients']).sort_values('Week2_Coefficients', ascending=False)

RidgeSTRONG= LogisticRegression(penalty = 'l2', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeSTRONG_coefs = pd.DataFrame(np.transpose(RidgeSTRONG.coef_), X.columns, columns=['Week2_Coefficients']).sort_values('Week2_Coefficients', ascending=False)

RidgeWEAK= LogisticRegression(penalty = 'l2', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeWEAK_coefs = pd.DataFrame(np.transpose(RidgeWEAK.coef_), X.columns, columns=['Week2_Coefficients']).sort_values('Week2_Coefficients', ascending=False)


In [9]:
with pd.ExcelWriter("Week2_Coeffs.xlsx") as writer:
    EnetCOEFS.to_excel(writer, sheet_name="Week2_Enet", index=True)
    LassoSTRONG_coefs.to_excel(writer, sheet_name="Week2_LassoStrong", index=True)
    LassoWEAK_coefs.to_excel(writer, sheet_name="Week2_LassoWeak", index=True)
    RidgeSTRONG_coefs.to_excel(writer, sheet_name="Week2_RidgeStrong", index=True)
    RidgeWEAK_coefs.to_excel(writer, sheet_name="Week2_RidgeWeak", index=True)

## Week 2 Score and coefs from Validation set

In [10]:
# df = pd.read_csv('../../ValidationSet/ValidationSet.txt', sep='\t', index_col=0)
# df = df.loc[(df['TimeGroup']=='Week2')|(df['TimeGroup']=='Healthy')]
# Xval = df.iloc[:,7:].copy()
# Yval = df.Status.copy()
# print(Yval.value_counts())

In [11]:
# print("ENET: BA score", balanced_accuracy_score(Yval, enet.predict(Xval)))
# print('LassoSTRONG: BA score', balanced_accuracy_score(Yval, LassoSTRONG.predict(Xval)))
# print("LassoWEAK: BA score", balanced_accuracy_score(Yval, LassoWEAK.predict(Xval)))
# print("RidgeSTRONG: BA score", balanced_accuracy_score(Yval, RidgeSTRONG.predict(Xval)))
# print("RidgeWEAK: BA score", balanced_accuracy_score(Yval, RidgeWEAK.predict(Xval)))

## Coef analysis on data from tuning
## Week 3
### Enet

In [12]:
df = pd.read_csv('../../Week3_IgGM_PosNeg_grouped.txt', sep='\t', index_col=0)
df.dropna(inplace=True)
Y = df.Status.copy()
X = df.iloc[:,7:].copy()

enet= LogisticRegression(penalty = 'elasticnet', C=100, l1_ratio=0.9, solver = 'saga', max_iter=int(1e6)).fit(X,Y)
EnetCOEFS = pd.DataFrame(np.transpose(enet.coef_), X.columns, columns=['Week3_Coefficients']).sort_values('Week3_Coefficients', ascending=False)

### Logit

In [13]:
LassoSTRONG= LogisticRegression(penalty = 'l1', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoSTRONG_coefs = pd.DataFrame(np.transpose(LassoSTRONG.coef_), X.columns, columns=['Week3_Coefficients']).sort_values('Week3_Coefficients', ascending=False)

LassoWEAK= LogisticRegression(penalty = 'l1', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoWEAK_coefs = pd.DataFrame(np.transpose(LassoWEAK.coef_), X.columns, columns=['Week3_Coefficients']).sort_values('Week3_Coefficients', ascending=False)

RidgeSTRONG= LogisticRegression(penalty = 'l2', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeSTRONG_coefs = pd.DataFrame(np.transpose(RidgeSTRONG.coef_), X.columns, columns=['Week3_Coefficients']).sort_values('Week3_Coefficients', ascending=False)

RidgeWEAK= LogisticRegression(penalty = 'l2', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeWEAK_coefs = pd.DataFrame(np.transpose(RidgeWEAK.coef_), X.columns, columns=['Week3_Coefficients']).sort_values('Week3_Coefficients', ascending=False)


In [14]:
with pd.ExcelWriter("Week3_Coeffs.xlsx") as writer:
    EnetCOEFS.to_excel(writer, sheet_name="Week3_Enet", index=True)
    LassoSTRONG_coefs.to_excel(writer, sheet_name="Week3_LassoStrong", index=True)
    LassoWEAK_coefs.to_excel(writer, sheet_name="Week3_LassoWeak", index=True)
    RidgeSTRONG_coefs.to_excel(writer, sheet_name="Week3_RidgeStrong", index=True)
    RidgeWEAK_coefs.to_excel(writer, sheet_name="Week3_RidgeWeak", index=True)

## Week 3 Score and coefs from Validation set

In [15]:
# df = pd.read_csv('../../ValidationSet/ValidationSet.txt', sep='\t', index_col=0)
# df = df.loc[(df['TimeGroup']=='Week3')|(df['TimeGroup']=='Healthy')]
# Xval = df.iloc[:,7:].copy()
# Yval = df.Status.copy()
# print(Yval.value_counts())

In [16]:
# print("ENET: BA score", balanced_accuracy_score(Yval, enet.predict(Xval)))
# print('LassoSTRONG: BA score', balanced_accuracy_score(Yval, LassoSTRONG.predict(Xval)))
# print("LassoWEAK: BA score", balanced_accuracy_score(Yval, LassoWEAK.predict(Xval)))
# print("RidgeSTRONG: BA score", balanced_accuracy_score(Yval, RidgeSTRONG.predict(Xval)))
# print("RidgeWEAK: BA score", balanced_accuracy_score(Yval, RidgeWEAK.predict(Xval)))

## Coef analysis on data from tuning
## Week 4
### Enet

In [17]:
df = pd.read_csv('../../Week4_IgGM_PosNeg_grouped.txt', sep='\t', index_col=0)
df.dropna(inplace=True)
Y = df.Status.copy()
X = df.iloc[:,7:].copy()

enet= LogisticRegression(penalty = 'elasticnet', C=100, l1_ratio=0.9, solver = 'saga', max_iter=int(1e6)).fit(X,Y)
EnetCOEFS = pd.DataFrame(np.transpose(enet.coef_), X.columns, columns=['Week4_Coefficients']).sort_values('Week4_Coefficients', ascending=False)

In [18]:
LassoSTRONG= LogisticRegression(penalty = 'l1', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoSTRONG_coefs = pd.DataFrame(np.transpose(LassoSTRONG.coef_), X.columns, columns=['Week4_Coefficients']).sort_values('Week4_Coefficients', ascending=False)

LassoWEAK= LogisticRegression(penalty = 'l1', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

LassoWEAK_coefs = pd.DataFrame(np.transpose(LassoWEAK.coef_), X.columns, columns=['Week4_Coefficients']).sort_values('Week4_Coefficients', ascending=False)

RidgeSTRONG= LogisticRegression(penalty = 'l2', C=0.01291549665014884,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeSTRONG_coefs = pd.DataFrame(np.transpose(RidgeSTRONG.coef_), X.columns, columns=['Week4_Coefficients']).sort_values('Week4_Coefficients', ascending=False)

RidgeWEAK= LogisticRegression(penalty = 'l2', C=100,solver = 'saga', max_iter=int(1e6)).fit(X,Y)

RidgeWEAK_coefs = pd.DataFrame(np.transpose(RidgeWEAK.coef_), X.columns, columns=['Week4_Coefficients']).sort_values('Week4_Coefficients', ascending=False)


In [19]:
with pd.ExcelWriter("Week4_Coeffs.xlsx") as writer:
    EnetCOEFS.to_excel(writer, sheet_name="Week4_Enet", index=True)
    LassoSTRONG_coefs.to_excel(writer, sheet_name="Week4_LassoStrong", index=True)
    LassoWEAK_coefs.to_excel(writer, sheet_name="Week4_LassoWeak", index=True)
    RidgeSTRONG_coefs.to_excel(writer, sheet_name="Week4_RidgeStrong", index=True)
    RidgeWEAK_coefs.to_excel(writer, sheet_name="Week4_RidgeWeak", index=True)

## Week 4 Score and coefs from Validation set

In [20]:
# df = pd.read_csv('../../ValidationSet/ValidationSet.txt', sep='\t', index_col=0)
# df = df.loc[(df['TimeGroup']=='Week4+')|(df['TimeGroup']=='Healthy')]
# Xval = df.iloc[:,7:].copy()
# Yval = df.Status.copy()
# print(Yval.value_counts())

In [21]:
# print("ENET: BA score", balanced_accuracy_score(Yval, enet.predict(Xval)))
# print('LassoSTRONG: BA score', balanced_accuracy_score(Yval, LassoSTRONG.predict(Xval)))
# print("LassoWEAK: BA score", balanced_accuracy_score(Yval, LassoWEAK.predict(Xval)))
# print("RidgeSTRONG: BA score", balanced_accuracy_score(Yval, RidgeSTRONG.predict(Xval)))
# print("RidgeWEAK: BA score", balanced_accuracy_score(Yval, RidgeWEAK.predict(Xval)))