# Mounting Google Drive to Colab to access the dataset for training the models and also saving the 10-folds cross-validation results

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

# Importing the necessary ML agorithms

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.svm import LinearSVC as LSVM, SVC
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.naive_bayes import CategoricalNB as NBC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.neural_network import MLPClassifier as MLPC
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression as LR

# Loading the dataset from google drive

In [None]:
# The first column of our dataset is "unnamed", which can be removed by any of the methods below:

# Method 1 using "index_col=0" as parameter in pd.read_csv() 
# Method 2 using df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [None]:
import pandas as pd
df = pd.read_csv("/content/gdrive/MyDrive/Machine Learning/Projects/PMI_and_Azeez/Manuscript and Data/new_combined_extracted_octapeptide_features.csv", index_col=0)
df

# Asigning features to X and cleavage status to y

In [None]:
X = df.drop(["cleavage Status"], axis=1)
y = df["cleavage Status"].copy()

# Encoding the y label

In [None]:
y = y.map({"cleaved": 1, "uncleaved": 0})

In [None]:
# alternatively, you can use the label encoder of sklearn
# from sklearn.preprocessing import LabelEncoder
# y2 = LabelEncoder().fit_transform(y)

# Performing 10-Folds Cross Validation of Our Models

In [None]:
# Import the necessary libraries for sttratified k-folds cross validation
from sklearn.model_selection import StratifiedKFold as SKF
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

In [None]:
cv = SKF(n_splits=10, random_state=1, shuffle=True)

**Instantiating the classifier classes**

In [None]:
lsvm = SVC(C=1, gamma="auto", kernel="linear", tol=1e-5)
lda = LDA()
gbc = GBC(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
knn = KNN(n_neighbors=4)
nbc = NBC()
dtc = DTC()
mlpc = MLPC()
p = Perceptron()
lr = LR(random_state=0)

clf_list = [
    (lsvm, "LinearSVC"),
    (lda, "LinearDiscriminantAnalysis"),
    (gbc, "GradientBoostingClassifier"),
    (knn, "KNeighorsClassifier"),
    (nbc, "CategoricalNB"),
    (dtc, "DecisionTreeClassifier"),
    (mlpc, "MLPClassifier"),
    (p, "Perceptron"),       
    (lr, "LogisticRegression"),
]

**Evaluate Models**

In [None]:
## Importing the defaultdict of collections library
from collections import defaultdict as dd

scores = dd(list) 

for i, (clf, name) in enumerate(clf_list):
  scores["Classifier"].append(name)

for i, (clf, name) in enumerate(clf_list): 
  accurecy_scores = cross_val_score(clf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
  scores["Accurecy"].append(accurecy_scores.mean())

for i, (clf, name) in enumerate(clf_list): 
  accurecy_scores = cross_val_score(clf, X, y, scoring='balanced_accuracy', cv=cv, n_jobs=-1)
  scores["Balanced Accurecy"].append(accurecy_scores.mean())
  
for i, (clf, name) in enumerate(clf_list):
  accurecy_scores = cross_val_score(clf, X, y, scoring='recall_micro', cv=cv, n_jobs=-1)
  scores["Recall"].append(accurecy_scores.mean())

for i, (clf, name) in enumerate(clf_list):
  accurecy_scores = cross_val_score(clf, X, y, scoring='jaccard_micro', cv=cv, n_jobs=-1)
  scores["Jaccard Score"].append(accurecy_scores.mean())

for i, (clf, name) in enumerate(clf_list):
  accurecy_scores = cross_val_score(clf, X, y, scoring='f1_micro', cv=cv, n_jobs=-1)
  scores["F1 Score"].append(accurecy_scores.mean())

for i, (clf, name) in enumerate(clf_list):
  accurecy_scores = cross_val_score(clf, X, y, scoring='roc_auc', cv=cv, n_jobs=-1)
  scores["AUC"].append(accurecy_scores.mean())

for i, (clf, name) in enumerate(clf_list):
  accurecy_scores = cross_val_score(clf, X, y, scoring='precision_micro', cv=cv, n_jobs=-1)
  scores["Precision Score"].append(accurecy_scores.mean())

score_df = pd.DataFrame(scores).set_index("Classifier")
score_df.round(decimals=2)

score_df

# Saving the 10-Fold Cross-Validation Results to Google Drive

In [None]:
score_df.to_csv("/content/gdrive/MyDrive/Machine Learning/Projects/PMI_and_Azeez/Manuscript and Data/10-folds cross-validation scores.csv")