In [3]:
# 📌 Step 1: Import Libraries
import pandas as pd
import joblib
import os

from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# 📌 Step 2: Load Dataset
df = pd.read_csv('../Data/preprocessed_data.csv')
print("✅ Data Loaded:", df.shape)

X = df.drop('Appeal_Category', axis=1)
y = df['Appeal_Category']

# 📌 Step 3: Load Pipeline from model.pkl
pipeline = joblib.load('../App/model/model.pkl')
print("✅ Model pipeline loaded.")

# 📌 Step 4: Get Random Forest from Pipeline
# You must use `named_steps` to access inner model
rf_model = pipeline.named_steps['model']

# 📌 Step 5: Define Hyperparameters for Randomized Search
param_dist = {
    'model__n_estimators': [100, 200, 300],
    'model__max_depth': [None, 10, 20, 30],
    'model__min_samples_split': [2, 5, 10],
    'model__min_samples_leaf': [1, 2, 4]
}

# 📌 Step 6: RandomizedSearchCV Setup
search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=10,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

# 📌 Step 7: Perform Hyperparameter Tuning
search.fit(X, y)
print("✅ Hyperparameter tuning completed.")
print("🔧 Best Parameters:", search.best_params_)

# 📌 Step 8: Evaluate Fine-Tuned Model
best_model = search.best_estimator_
y_pred = best_model.predict(X)
print("\n📊 Evaluation Report:")
print(classification_report(y, y_pred))
print("✅ Accuracy:", accuracy_score(y, y_pred))

# 📌 Step 9: Save Fine-Tuned Model
joblib.dump(best_model, '../App/model/fine_tune.pkl')
print("✅ Fine-tuned model saved as 'fine_tune.pkl'")


✅ Data Loaded: (2500, 7)
✅ Model pipeline loaded.
Fitting 3 folds for each of 10 candidates, totalling 30 fits
✅ Hyperparameter tuning completed.
🔧 Best Parameters: {'model__n_estimators': 300, 'model__min_samples_split': 5, 'model__min_samples_leaf': 2, 'model__max_depth': 30}

📊 Evaluation Report:
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       500
           1       0.88      0.90      0.89       500
           2       0.92      0.90      0.91       500
           3       0.88      0.90      0.89       500
           4       0.89      0.89      0.89       500

    accuracy                           0.89      2500
   macro avg       0.89      0.89      0.89      2500
weighted avg       0.89      0.89      0.89      2500

✅ Accuracy: 0.8936
✅ Fine-tuned model saved as 'fine_tune.pkl'
