In [None]:
!pip install --upgrade pip
!pip install --upgrade setuptools
!pip install --upgrade scikit-learn

[0m

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv("/content/ecg.csv")
df.head()

Unnamed: 0,Name,Age,Gender,Mean_RR,STD_RR,RMS_RR,Mean_HR,STD_HR,RMSSD,Status
0,Vardhaman,23,M,855.719153,57.218808,857.630025,70.116463,4.555713,42.993773,0
1,Vincet,27,M,752.645039,71.467749,756.030551,79.718854,9.007666,31.065595,0
2,Ramesh,30,M,660.65159,16.939869,660.868733,90.819429,2.274697,9.424568,0
3,Pulak Paul,36,M,736.175919,54.695467,738.20497,81.502258,6.749843,25.330474,0
4,SAURABH SINGH,31,M,847.817407,137.759364,858.93655,70.769955,13.943722,43.58989,0


In [None]:
df = df.sample(frac = 1)
df.head()

Unnamed: 0,Name,Age,Gender,Mean_RR,STD_RR,RMS_RR,Mean_HR,STD_HR,RMSSD,Status
81,Venkataramanna C,28,M,961.057439,316.247015,1011.752724,62.431232,12.099576,380.524026,0
89,Rakesh Kumar,32,M,697.29116,57.029816,699.619441,86.047269,7.334369,23.63633,0
76,Sandhyagappa,32,M,716.926404,25.134266,717.366851,83.690599,2.911873,23.065471,0
107,Srinivas,40,M,919.535465,45.452668,920.658143,65.250338,4.107146,29.583424,1
108,Murthy,28,M,672.372371,135.806448,685.950433,89.236266,10.577006,182.457502,0


In [None]:
# create an instance of LabelEncoder
le = LabelEncoder()

# fit and transform the "Senior Management"
# column with LabelEncoder
df['Gender'] = le.fit_transform(df['Gender'])
df.head()

Unnamed: 0,Name,Age,Gender,Mean_RR,STD_RR,RMS_RR,Mean_HR,STD_HR,RMSSD,Status
81,Venkataramanna C,28,1,961.057439,316.247015,1011.752724,62.431232,12.099576,380.524026,0
89,Rakesh Kumar,32,1,697.29116,57.029816,699.619441,86.047269,7.334369,23.63633,0
76,Sandhyagappa,32,1,716.926404,25.134266,717.366851,83.690599,2.911873,23.065471,0
107,Srinivas,40,1,919.535465,45.452668,920.658143,65.250338,4.107146,29.583424,1
108,Murthy,28,1,672.372371,135.806448,685.950433,89.236266,10.577006,182.457502,0


In [None]:
X = df.drop(['Name', 'Age', 'Gender', 'Status'], axis=1).copy()
y = df['Status'].copy()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=66)

In [None]:
params = {
    "ccp_alpha": [0.01, 0.05, 0.1, 0.5, 0.9, 1],
    'max_depth': [None, 1, 5, 10, 50, 100],
    'n_estimators': [50, 80, 100],
    'criterion': ["gini", "entropy"],
    'max_features': [1, 10, 50, 100],
    'bootstrap': [True, False],
    'class_weight': [None, 'balanced', 'balanced_subsample'],
}


In [None]:
rfc = RandomForestClassifier(random_state=42)
rfcCV = GridSearchCV(estimator=rfc, param_grid=params, scoring='neg_mean_absolute_error', cv=5, return_train_score=True, verbose=1, n_jobs=-1)
rfcCV.fit(X_train, y_train)

Fitting 5 folds for each of 5184 candidates, totalling 25920 fits


In [None]:
opt_ccp_alpha = rfcCV.best_params_['ccp_alpha']
opt_max_depth = rfcCV.best_params_['max_depth']
opt_n_estimators = rfcCV.best_params_['n_estimators']
opt_criterion = rfcCV.best_params_['criterion']
opt_max_features = rfcCV.best_params_['max_features']
opt_bootstrap = rfcCV.best_params_['bootstrap']
opt_class_weight = rfcCV.best_params_['class_weight']

print(f"OPTIMUM VALUES\nAlpha: {opt_ccp_alpha}\nMax Depth: {opt_max_depth}\nN Estimators: {opt_n_estimators}\nCriterion: {opt_criterion}\nMax Features: {opt_max_features}\nBootstrap: {opt_bootstrap}\nClass weight: {opt_class_weight}")

OPTIMUM VALUES
Alpha: 0.05
Max Depth: None
N Estimators: 80
Criterion: entropy
Max Features: 1
Bootstrap: False
Class weight: None


In [None]:
clf = RandomForestClassifier(
    ccp_alpha = opt_ccp_alpha,
    max_depth = opt_max_depth,
    n_estimators = opt_n_estimators,
    criterion = opt_criterion,
    max_features = opt_max_features,
    bootstrap = opt_bootstrap,
    class_weight = opt_class_weight,
    random_state = 1
  )

kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(clf, X_train, y_train, cv=kf, scoring='accuracy')

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", np.mean(cv_scores))

Cross-Validation Scores: [0.68421053 0.77777778 0.5        0.61111111 0.66666667]
Mean Accuracy: 0.6479532163742691


In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {int(test_accuracy*100)}%")

Accuracy: 79%


In [None]:
import joblib

In [None]:
joblib.dump(rfcCV.best_estimator_, '/content/model-79.joblib')

['/content/model-79.joblib']

In [None]:
loaded_model = joblib.load('/content/model-79.joblib')

In [None]:
y_test_preds = loaded_model.predict(X_test)

In [None]:
test_accuracy = accuracy_score(y_test, y_test_preds)
print(f"Accuracy: {int(test_accuracy*100)}%")

Accuracy: 79%
