In [1]:
!pip install scikit-learn xgboost lightgbm catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.2-cp310-cp310-manylinux2014_x86_64.whl (98.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2


In [2]:
import sklearn
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, StackingClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, f1_score
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

In [4]:
df = pd.read_csv('Subject Components Eczema with Severity and SCORAD.csv')

In [12]:
mapping = {"Mild": 0, "Moderate": 1, "Severe": 2}
df['Severity'] = df['Severity'].replace(mapping)
df.head()

Unnamed: 0,Severity,Subject,Ceramide,Lactic Acid,PCA,Uric Acid,Urea,Melanin,chol
0,2,EC001,0.251417,5263.565519,2039.315078,111.788984,535.868202,100700.0691,83.034163
1,2,EC002,0.683433,3.401875,1.362861,0.022682,0.341596,29.113241,0.106501
2,2,EC003,1.032862,2.477007,1.524537,0.021377,0.19433,85.875953,0.169743
3,2,EC004,0.936938,3.802298,3.126483,0.015471,0.409775,172.254982,0.108228
4,2,EC006,3.527008,2.499475,1.071707,0.00279,0.003685,4.296232,0.129945


In [13]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2, random_state=42)

In [25]:
X = df.drop("Subject", axis=1).drop("Severity", axis=1)
y = df['Severity']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [29]:
classifiers = {
    'Random Forest': (RandomForestClassifier(), {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2']
    }),
    'K-Nearest Neighbors': (KNeighborsClassifier(), {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree']
    }),
    'Decision Tree': (DecisionTreeClassifier(), {
        'max_depth': [None, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2']
    }),
    'AdaBoost': (AdaBoostClassifier(), {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.1, 0.01, 0.001]
    }),
    'XGBoost': (xgb.XGBClassifier(), {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.1, 0.01, 0.001],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0],
    }),
    'LightGBM': (lgb.LGBMClassifier(), {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.1, 0.01, 0.001],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0],
    }),
    'CatBoost': (CatBoostClassifier(), {
        'iterations': [50, 100, 200],
        'learning_rate': [0.1, 0.01, 0.001],
        'depth': [3, 5, 7],
    }),
}

for clf_name, (clf, param_grid) in classifiers.items():
    grid_search = GridSearchCV(clf, param_grid, cv=10)
    grid_search.fit(X, y)

    best_score = grid_search.best_score_
    best_params = grid_search.best_params_

    print(f"Results for {clf_name}:")
    print("Best cross-validation accuracy:", best_score)
    print("Best hyperparameters:", best_params)
    print("-----------------------------------")

    best_model = grid_search.best_estimator_
    test_score = best_model.score(X, y)



Results for Random Forest:
Best cross-validation accuracy: 0.63
Best hyperparameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'n_estimators': 50}
-----------------------------------




Results for K-Nearest Neighbors:
Best cross-validation accuracy: 0.605
Best hyperparameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'uniform'}
-----------------------------------




Results for Decision Tree:
Best cross-validation accuracy: 0.5599999999999999
Best hyperparameters: {'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1}
-----------------------------------




Results for AdaBoost:
Best cross-validation accuracy: 0.5599999999999999
Best hyperparameters: {'learning_rate': 0.01, 'n_estimators': 50}
-----------------------------------




Results for XGBoost:
Best cross-validation accuracy: 0.6
Best hyperparameters: {'colsample_bytree': 0.8, 'learning_rate': 0.001, 'max_depth': 5, 'n_estimators': 50, 'subsample': 1.0}
-----------------------------------




Results for LightGBM:
Best cross-validation accuracy: 0.575
Best hyperparameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.8}
-----------------------------------
0:	learn: 1.0684454	total: 47.1ms	remaining: 2.31s
1:	learn: 1.0390788	total: 47.8ms	remaining: 1.15s
2:	learn: 1.0170622	total: 48.3ms	remaining: 757ms
3:	learn: 0.9997909	total: 48.8ms	remaining: 561ms
4:	learn: 0.9770936	total: 49.2ms	remaining: 443ms
5:	learn: 0.9590561	total: 49.7ms	remaining: 364ms
6:	learn: 0.9448363	total: 50.2ms	remaining: 308ms
7:	learn: 0.9313672	total: 50.6ms	remaining: 266ms
8:	learn: 0.9154293	total: 51.1ms	remaining: 233ms
9:	learn: 0.8999348	total: 51.6ms	remaining: 206ms
10:	learn: 0.8848821	total: 51.9ms	remaining: 184ms
11:	learn: 0.8747589	total: 52.4ms	remaining: 166ms
12:	learn: 0.8629915	total: 52.9ms	remaining: 150ms
13:	learn: 0.8526674	total: 53.3ms	remaining: 137ms
14:	learn: 0.8397945	total: 54ms	remaining: 126ms
15:	learn: 0



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
54:	learn: 0.4797796	total: 111ms	remaining: 293ms
55:	learn: 0.4723898	total: 114ms	remaining: 293ms
56:	learn: 0.4668948	total: 116ms	remaining: 291ms
57:	learn: 0.4610341	total: 118ms	remaining: 289ms
58:	learn: 0.4557292	total: 120ms	remaining: 287ms
59:	learn: 0.4473689	total: 122ms	remaining: 284ms
60:	learn: 0.4374916	total: 124ms	remaining: 282ms
61:	learn: 0.4330265	total: 126ms	remaining: 280ms
62:	learn: 0.4253203	total: 127ms	remaining: 277ms
63:	learn: 0.4207014	total: 129ms	remaining: 275ms
64:	learn: 0.4132658	total: 131ms	remaining: 273ms
65:	learn: 0.4058008	total: 133ms	remaining: 270ms
66:	learn: 0.3982897	total: 135ms	remaining: 268ms
67:	learn: 0.3937288	total: 137ms	remaining: 265ms
68:	learn: 0.3891391	total: 138ms	remaining: 263ms
69:	learn: 0.3835002	total: 140ms	remaining: 260ms
70:	learn: 0.3767105	total: 142ms	remaining: 257ms
71:	learn: 0.3708900	total: 143ms	remaining: 255ms
72:	learn: 0.3665

In [30]:
estimators = [(name, clf) for name, (clf, _) in classifiers.items()]
stacking_clf = StackingClassifier(estimators=estimators)
stacking_clf.fit(X, y)

bagging_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100)
bagging_clf.fit(X, y)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
4:	learn: 1.0018415	total: 8.35ms	remaining: 1.66s
5:	learn: 0.9892866	total: 10.3ms	remaining: 1.71s
6:	learn: 0.9702285	total: 11.2ms	remaining: 1.59s
7:	learn: 0.9563766	total: 12.3ms	remaining: 1.52s
8:	learn: 0.9435343	total: 13.3ms	remaining: 1.46s
9:	learn: 0.9300057	total: 14.4ms	remaining: 1.43s
10:	learn: 0.9070139	total: 15.5ms	remaining: 1.39s
11:	learn: 0.8933337	total: 16.7ms	remaining: 1.37s
12:	learn: 0.8817693	total: 17.7ms	remaining: 1.34s
13:	learn: 0.8708533	total: 18.7ms	remaining: 1.31s
14:	learn: 0.8608131	total: 19.8ms	remaining: 1.3s
15:	learn: 0.8431666	total: 20.7ms	remaining: 1.27s
16:	learn: 0.8324682	total: 21.7ms	remaining: 1.25s
17:	learn: 0.8209869	total: 22.7ms	remaining: 1.24s
18:	learn: 0.8098263	total: 23.4ms	remaining: 1.21s
19:	learn: 0.8021712	total: 24.4ms	remaining: 1.2s
20:	learn: 0.7918965	total: 25.1ms	remaining: 1.17s
21:	learn: 0.7762199	total: 26.1ms	remaining: 1.16s
22:	lea



In [31]:
print(stacking_clf.score(X_test, y_test))
print(bagging_clf.score(X_test, y_test))

0.5384615384615384
1.0
