In [8]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

# Load the data
df = pd.read_csv("placement_synthetic_data_5000.csv")

# Encode categorical variables
label_encoders = {}
categorical_columns = ['gender', 'stream', 'leadership_role', 'final_package']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoders for later use

# Separate features and target
X = df.drop("final_package", axis=1)
y = df["final_package"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
model = xgb.XGBClassifier(objective="multi:softmax", num_class=5, eval_metric="mlogloss", use_label_encoder=False)
model.fit(X_train, y_train)

# Save model and encoders
with open("xgb_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

print("Model and encoders saved successfully.")


Model and encoders saved successfully.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [10]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as xgb

# Load the dataset
df = pd.read_csv("placement_synthetic_data_5000.csv")

# Load encoders
with open("label_encoders.pkl", "rb") as f:
    label_encoders = pickle.load(f)

# Encode categorical features
df_encoded = df.copy()
for col in ["gender", "stream", "leadership_role", "final_package"]:
    df_encoded[col] = label_encoders[col].transform(df_encoded[col])

# Split features and target
X = df_encoded.drop("final_package", axis=1)
y = df_encoded["final_package"]

# Train-test split (same as before)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load model
with open("xgb_model.pkl", "rb") as f:
    model = pickle.load(f)

# Predict and evaluate
y_pred = model.predict(X_test)

print("🔍 Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoders["final_package"].classes_))
print("\n🧱 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


🔍 Accuracy on test set: 0.92

📊 Classification Report:
                 precision    recall  f1-score   support

   11 - 15 LPA       0.80      0.82      0.81       109
   16 - 20 LPA       0.65      0.50      0.56        22
21 LPA or more       0.00      0.00      0.00         3
     3 - 6 LPA       0.97      0.96      0.96       500
    7 - 10 LPA       0.90      0.93      0.92       366

      accuracy                           0.92      1000
     macro avg       0.66      0.64      0.65      1000
  weighted avg       0.92      0.92      0.92      1000


🧱 Confusion Matrix:
 [[ 89   3   0   0  17]
 [ 11  11   0   0   0]
 [  0   3   0   0   0]
 [  0   0   0 478  22]
 [ 11   0   0  13 342]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
