In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

data = pd.read_excel('dataset.xlsx')

X = data.drop('Class', axis=1)
y = data[['Class']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rand_model = RandomForestClassifier()
rand_model.fit(X_train, y_train.values.ravel())

y_test_pred = rand_model.predict(X_test)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)

confusion = confusion_matrix(y_test, y_test_pred)

print("Confusion matrix:")
print(confusion)

parameters = {'n_estimators': [10, 50, 100], 'max_depth': [3, 5, 7]}
clf = GridSearchCV(RandomForestClassifier(), parameters)
clf.fit(X_train, y_train.values.ravel())

print("Best parameters:")
print(clf.best_params_)

best_model = RandomForestClassifier(n_estimators=clf.best_params_['n_estimators'], max_depth=clf.best_params_['max_depth'])
best_model.fit(X_train, y_train.values.ravel())

y_pred_best = best_model.predict(X_test)

accuracy_best = accuracy_score(y_test, y_pred_best)
precision_best = precision_score(y_test, y_pred_best, average='weighted')

print("Accuracy with best parameters:", accuracy_best)
print("Precision with best parameters:", precision_best)

Accuracy: 0.922869735553379
Precision: 0.9230650614001625
Confusion matrix:
[[355   0  26   0   1   3  10]
 [  0 161   0   0   0   0   0]
 [ 18   0 446   0  10   2   3]
 [  0   0   0 968   2  19  54]
 [  1   0   8   3 561   0  15]
 [  4   0   0  21   0 581  13]
 [  1   0   2  80   8  11 697]]
Best parameters:
{'max_depth': 7, 'n_estimators': 100}
Accuracy with best parameters: 0.9123408423114594
Precision with best parameters: 0.9131712664744028
