In [1]:
# =========================================
# Career Path Recommender
# 05_career_path_model.ipynb
# Cell 1: Load Prepared Features
# =========================================

import pandas as pd

# Load processed features
X_train = pd.read_csv("career_X_train.csv")
X_test  = pd.read_csv("career_X_test.csv")
y_train = pd.read_csv("career_y_train.csv").values.ravel()
y_test  = pd.read_csv("career_y_test.csv").values.ravel()

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

print("\nSample X_train:")
X_train.head()


X_train shape: (456, 3)
X_test shape: (114, 3)
y_train shape: (456,)
y_test shape: (114,)

Sample X_train:


Unnamed: 0,placed,salary,salary_level_enc
0,1,61000.0,1
1,0,65000.0,2
2,0,66000.0,2
3,1,64000.0,2
4,0,67000.0,0


In [2]:
# =========================================
# Cell 2: Train & Evaluate Career Path Model
# =========================================

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize model
career_model = LogisticRegression(
    max_iter=1000,
    class_weight="balanced"
)

# Train
career_model.fit(X_train, y_train)

# Predict
y_pred = career_model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


Accuracy: 1.0

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        96

    accuracy                           1.00       114
   macro avg       1.00      1.00      1.00       114
weighted avg       1.00      1.00      1.00       114



In [6]:
# =========================================
# FIX: Recreate & Save Encoders + Model
# =========================================

import pandas as pd
import os
import joblib
from sklearn.preprocessing import LabelEncoder

# Create models directory
os.makedirs("/content/models", exist_ok=True)

# Reload cleaned dataset to recreate encoders
career_df = pd.read_csv("career_path_cleaned.csv")

# Recreate encoders (same logic as features notebook)
salary_level_encoder = LabelEncoder()
career_df["salary_level_enc"] = salary_level_encoder.fit_transform(
    career_df["salary_level"]
)

career_path_encoder = LabelEncoder()
career_df["career_path_enc"] = career_path_encoder.fit_transform(
    career_df["career_path"]
)

# Save model
joblib.dump(career_model, "/content/models/career_path_model.pkl")

# Save encoders
joblib.dump(salary_level_encoder, "/content/models/salary_level_encoder.pkl")
joblib.dump(career_path_encoder, "/content/models/career_path_encoder.pkl")

print("✅ Career Path model & encoders saved successfully!")
print("Saved files:")
print(os.listdir("/content/models"))


✅ Career Path model & encoders saved successfully!
Saved files:
['career_path_encoder.pkl', 'salary_level_encoder.pkl', 'career_path_model.pkl']
