<a href="https://colab.research.google.com/github/SiddharthGoel/test/blob/master/ModelCreation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Step 1: Load the data
data = pd.read_csv("https://raw.githubusercontent.com/SiddharthGoel/MushroomClassification/main/mushroom_cleaned.csv")
df = pd.DataFrame(data)

# Step 2: Prepare the feature matrix X and target vector y
cols = df.columns.to_list()
cols.remove('class')
X = df[cols]
y = df["class"]

# Step 3: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 4: Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scaling step
    ('classifier', RandomForestClassifier(random_state=42))  # Classification step
])

# Step 5: Train the model using the pipeline
pipeline.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = pipeline.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 7: Save the entire pipeline
model_filename = 'mushroom_classifier_pipeline.pkl'
joblib.dump(pipeline, model_filename)
print(f"Pipeline saved to {model_filename}")

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4872
           1       0.99      0.99      0.99      5935

    accuracy                           0.99     10807
   macro avg       0.99      0.99      0.99     10807
weighted avg       0.99      0.99      0.99     10807

Accuracy: 0.9895438141945035
Pipeline saved to mushroom_classifier_pipeline.pkl
