In [2]:
# ✅ Full Notebook Code to Train & Save Models

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import joblib
import os

# 1️⃣ Load your cleaned dataset
df = pd.read_csv("cleaned_course_dataset.csv")

# Check columns
print("Columns in dataset:", df.columns.tolist())

# 2️⃣ Preprocess columns

# Convert Price to numeric: Free and Paid/Subscription → 0
df['Price'] = df['Price'].replace(['Free', 'Paid / Subscription'], 0)
df['Price'] = pd.to_numeric(df['Price'], errors='coerce').fillna(0)

# Is_Paid: 1 if Price > 0 else 0
df['Is_Paid'] = (df['Price'] > 0).astype(int)

# Convert Duration to numeric months (extract first number)
df['Duration_Months'] = df['Duration'].str.extract(r'(\d+)').astype(float)

# Success: 1 if Rating >= 4.0 AND Learners > 1000
df['Success'] = ((df['Rating'] >= 4.0) & (df['Learners'] > 1000)).astype(int)

# 3️⃣ Select features and target
X = df[["Rating", "Learners", "Duration_Months", "Is_Paid"]]
y = df["Success"]

# 4️⃣ Train Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X, y)

# 5️⃣ Train XGBoost (clean version, no use_label_encoder warning)
xgb_model = xgb.XGBClassifier(eval_metric='logloss')
xgb_model.fit(X, y)

# 6️⃣ Create 'models' folder if it doesn't exist
if not os.path.exists("models"):
    os.makedirs("models")

# 7️⃣ Save models
joblib.dump(rf_model, "models/rf_model.pkl")
joblib.dump(xgb_model, "models/xgb_model.pkl")

print("✅ Models trained and saved in 'models/' folder")



Columns in dataset: ['Platform', 'Domain', 'Title', 'Organization', 'Rating', 'Learners', 'Duration', 'Difficulty', 'Price', 'Duration_Months', 'Is_Paid', 'Difficulty_Clean', 'Difficulty_Encoded', 'Success_Label']
✅ Models trained and saved in 'models/' folder
