In [19]:

import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc, balanced_accuracy_score
import pandas as pd


In [20]:
# Split the data into X and y
y=pd.read_csv("y_PKM2.csv")
X = pd.read_csv("X_PKM2_pca.csv")

# Split the data into training and test data set
X_train, X_test, y_train, y_test_PKM2 = train_test_split(X,y, train_size=0.7,test_size=0.3,random_state=42)
y_train=y_train.values.ravel()
y_test_PKM2=y_test_PKM2.values.ravel()

highest_BAcc_result_PKM2 = float('-inf')
for i in range(100):

    logr_model=LogisticRegression(class_weight={0: 1, 1: i}) #weights added to make sure 1 has a higher chance
    logr_model.fit(X_train,y_train)

    y_pred_PKM2 = logr_model.predict(X_test)
    accuracy_PKM2 = accuracy_score(y_test_PKM2, y_pred_PKM2)
    balanced_accuracy_PKM2=balanced_accuracy_score(y_test_PKM2, y_pred_PKM2)

    if balanced_accuracy_PKM2 > highest_BAcc_result_PKM2:
        highest_BAcc_result_PKM2 = balanced_accuracy_PKM2
        highest_acc_result_PKM2=accuracy_PKM2
        best_y_pred_PKM2=y_pred_PKM2



print("Results of logistic regression for prediction of PKM_2 inhibition")        
print("Accuracy: {:.2f}%".format(highest_acc_result_PKM2 * 100))
print("Balanced accuracy: {:.2f}%".format(highest_BAcc_result_PKM2 * 100))

# evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test_PKM2, best_y_pred_PKM2))
print("\nClassification Report:\n", classification_report(y_test_PKM2, best_y_pred_PKM2))




Results of logistic regression for prediction of PKM_2 inhibition
Accuracy: 75.52%
Balanced accuracy: 87.54%
Confusion Matrix:
 [[247  82]
 [  0   6]]

Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.75      0.86       329
         1.0       0.07      1.00      0.13         6

    accuracy                           0.76       335
   macro avg       0.53      0.88      0.49       335
weighted avg       0.98      0.76      0.84       335



In [21]:
X=pd.read_csv("X_ERK2_pca.csv") 
y=pd.read_csv("y_ERK2.csv")

# Split the data into training and test data set
X_train, X_test, y_train, y_test_ERK2 = train_test_split(X,y, train_size=0.7,test_size=0.3,random_state=42)
y_train=y_train.values.ravel()
y_test_ERK2=y_test_ERK2.values.ravel()

highest_BAcc_result_ERK2 = float('-inf')
for i in range(100):

    logr_model=LogisticRegression(class_weight={0: 1, 1: i}) #weights added to make sure 1 has a higher chance
    logr_model.fit(X_train,y_train)

    y_pred_ERK2= logr_model.predict(X_test)
    y_prob_ERK2 = logr_model.predict_proba(X_test)[:, 1]
    accuracy_ERK2 = accuracy_score(y_test_ERK2, y_pred_ERK2)
    balanced_accuracy_ERK2=balanced_accuracy_score(y_test_ERK2, y_pred_ERK2)

    if balanced_accuracy_ERK2 > highest_BAcc_result_ERK2:
        highest_BAcc_result_ERK2 = balanced_accuracy_ERK2
        highest_acc_result_ERK2=accuracy_ERK2
        best_y_pred_ERK2=y_pred_ERK2
        best_y_prob_ERK2= y_prob_ERK2

print("Results of logistic regression for prediction of ERK_2 inhibition") 
print("Accuracy: {:.2f}%".format(highest_acc_result_ERK2  * 100))
print("Balanced accuracy: {:.2f}%".format(highest_BAcc_result_ERK2  * 100))

# evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test_ERK2, best_y_pred_ERK2))
print("\nClassification Report:\n", classification_report(y_test_ERK2, best_y_pred_ERK2))



Results of logistic regression for prediction of ERK_2 inhibition
Accuracy: 68.66%
Balanced accuracy: 71.71%
Confusion Matrix:
 [[221 102]
 [  3   9]]

Classification Report:
               precision    recall  f1-score   support

         0.0       0.99      0.68      0.81       323
         1.0       0.08      0.75      0.15        12

    accuracy                           0.69       335
   macro avg       0.53      0.72      0.48       335
weighted avg       0.95      0.69      0.78       335



In [22]:
#total for both 
accuracy= (highest_acc_result_ERK2+ highest_acc_result_PKM2)/2
balanced_accuracy=(highest_BAcc_result_ERK2 + highest_BAcc_result_PKM2)/2
print("Accuracy: {:.2f}%".format(accuracy  * 100))
print("Balanced accuracy: {:.2f}%".format(balanced_accuracy * 100))

Accuracy: 72.09%
Balanced accuracy: 79.62%
