In [1]:
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pickle

In [22]:
path = Path('MushroomDataset/preprocessed_mushroom_data_1.csv')
complete_data = pd.read_csv(path)

In [23]:
y_data = complete_data["class"].to_numpy()
X_data = complete_data.drop(columns = ["class"])

In [24]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=78)

In [26]:
scaler = StandardScaler()

scaler_fit = scaler.fit(X_train)

X_train_scaled = scaler_fit.transform(X_train)
X_test_scaled = scaler_fit.transform(X_test)

In [27]:
 # Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)

In [28]:
# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [29]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

In [30]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [31]:
 # Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,8360,33
Actual 1,31,6844


Accuracy Score : 0.9958082263557768
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8393
           1       1.00      1.00      1.00      6875

    accuracy                           1.00     15268
   macro avg       1.00      1.00      1.00     15268
weighted avg       1.00      1.00      1.00     15268



In [34]:
filename = 'trained_models/rf_model_1.pkl'
with open(filename, 'wb') as file:
    pickle.dump(rf_model, file)

In [37]:
scaler_filename = "trained_scalers/rf_scaler_1.pkl"

with open(scaler_filename, 'wb') as file:
    pickle.dump(scaler_fit, file)