In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

data = pd.read_excel('dataset.xlsx')

X = data.drop('Class', axis=1)
y = data[['Class']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

alg_model = RandomForestClassifier()
alg_model.fit(X_train, y_train.values.ravel())

y_test_pred = alg_model.predict(X_test)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)

confusion = confusion_matrix(y_test, y_test_pred)

print("Confusion matrix:")
print(confusion)

parameters = {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, 15]}
clf = GridSearchCV(RandomForestClassifier(), parameters)
clf.fit(X_train, y_train.values.ravel())

print("Best parameters:")
print(clf.best_params_)

best_model = RandomForestClassifier(n_estimators=clf.best_params_['n_estimators'], max_depth=clf.best_params_['max_depth'])
best_model.fit(X_train, y_train.values.ravel())

y_pred_best = best_model.predict(X_test)

accuracy_best = accuracy_score(y_test, y_pred_best)
precision_best = precision_score(y_test, y_pred_best, average='weighted')

print("Accuracy with best parameters:", accuracy_best)
print("Precision with best parameters:", precision_best)

Accuracy: 0.9226248775710089
Precision: 0.9228183221665028
Confusion matrix:
[[358   0  24   0   1   2  10]
 [  0 161   0   0   0   0   0]
 [ 24   0 443   0   7   2   3]
 [  0   0   0 966   2  19  56]
 [  3   0   8   4 560   0  13]
 [  4   0   0  21   0 581  13]
 [  1   0   2  78   9  10 699]]
Best parameters:
{'max_depth': 15, 'n_estimators': 100}
Accuracy with best parameters: 0.9204211557296768
Precision with best parameters: 0.9206492229473558
