In [None]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier

# ----------------------------
# Load dataset
# ----------------------------
df = pd.read_csv(r"C:\Users\hrida\Documents\machine learning projects\placement prediction system\data\placementdata.csv")

# ----------------------------
# Encoding categorical columns
# ----------------------------
df['ExtracurricularActivities'] = df['ExtracurricularActivities'].map({'Yes':1,'No':0})
df['PlacementTraining'] = df['PlacementTraining'].map({'Yes':1,'No':0})
df['PlacementStatus'] = df['PlacementStatus'].map({'Placed':1,'NotPlaced':0})

df.drop('StudentID', axis=1, inplace=True)

X = df.drop('PlacementStatus', axis=1)
y = df['PlacementStatus']

# ----------------------------
# Train-test split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------------
# Feature scaling
# ----------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------------
# Linear Regression (used as classifier)
# ----------------------------
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)

lr_preds = lr.predict(X_test_scaled)
lr_class_preds = (lr_preds >= 0.5).astype(int)

print("Linear Regression Accuracy:",
      accuracy_score(y_test, lr_class_preds))

# ----------------------------
# Random Forest
# ----------------------------
rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)
rf.fit(X_train_scaled, y_train)

print("Random Forest Accuracy:",
      accuracy_score(y_test, rf.predict(X_test_scaled)))

# ----------------------------
# XGBoost
# ----------------------------
xgb = XGBClassifier(
    eval_metric="logloss",
    random_state=42
)
xgb.fit(X_train_scaled, y_train)

print("XGBoost Accuracy:",
      accuracy_score(y_test, xgb.predict(X_test_scaled)))

# ----------------------------
# Save models
# ----------------------------
joblib.dump(lr, "linear_model.pkl")
joblib.dump(rf, "rf_model.pkl")
joblib.dump(xgb, "xgb_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Models saved successfully.")
