In [3]:
# 📌 Step 1: Import Libraries
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# 📌 Step 2: Load Data
df = pd.read_csv('../Data/preprocessed_data.csv')
print("✅ Preprocessed Data Loaded:", df.shape)

# 📌 Step 3: Split Features and Target
X = df.drop('Appeal_Category', axis=1)
y = df['Appeal_Category']

# 📌 Step 4: Columns
text_column = 'Appeal_Text'
numerical_cols = [col for col in X.columns if col != text_column]

# 📌 Step 5: Define Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('text', TfidfVectorizer(max_features=300), text_column),
        ('num', 'passthrough', numerical_cols)
    ]
)

# 📌 Step 6: Create Pipeline with Random Forest
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('model', RandomForestClassifier())
])

# 📌 Step 7: Train Pipeline
pipeline.fit(X, y)
print("✅ Random Forest pipeline trained successfully.")

# 📌 Step 8: Save Model
os.makedirs('../App/model', exist_ok=True)
joblib.dump(pipeline, '../App/model/model.pkl')
print("✅ Model saved to 'App/model/model.pkl'")


✅ Preprocessed Data Loaded: (2500, 7)
✅ Random Forest pipeline trained successfully.
✅ Model saved to 'App/model/model.pkl'
