In [3]:
# ðŸ“Œ Step 1: Import Libraries
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# ðŸ“Œ Step 2: Load Data
df = pd.read_csv('../Data/preprocessed_data.csv')
print("âœ… Preprocessed Data Loaded:", df.shape)

# ðŸ“Œ Step 3: Split Features and Target
X = df.drop('Appeal_Category', axis=1)
y = df['Appeal_Category']

# ðŸ“Œ Step 4: Columns
text_column = 'Appeal_Text'
numerical_cols = [col for col in X.columns if col != text_column]

# ðŸ“Œ Step 5: Define Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('text', TfidfVectorizer(max_features=300), text_column),
        ('num', 'passthrough', numerical_cols)
    ]
)

# ðŸ“Œ Step 6: Create Pipeline with Random Forest
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('model', RandomForestClassifier())
])

# ðŸ“Œ Step 7: Train Pipeline
pipeline.fit(X, y)
print("âœ… Random Forest pipeline trained successfully.")

# ðŸ“Œ Step 8: Save Model
os.makedirs('../App/model', exist_ok=True)
joblib.dump(pipeline, '../App/model/model.pkl')
print("âœ… Model saved to 'App/model/model.pkl'")


âœ… Preprocessed Data Loaded: (2500, 7)
âœ… Random Forest pipeline trained successfully.
âœ… Model saved to 'App/model/model.pkl'
