In [None]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import cloudpickle

# Load synthetic dataset
data = pd.read_csv('synthetic_student_loan_data.csv')

# Preprocess data
# Transform "Priority_Score" into binary classes based on the median
data['High_Priority'] = (data['Priority_Score'] > data['Priority_Score'].median()).astype(int)
X = data.drop(columns=["Priority_Score", "High_Priority"])
y = data["High_Priority"]

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define feature groups
numeric_features = ["Attendance_Rate", "Grades", "Distance_to_School"]
numeric_transformer = StandardScaler()

categorical_features = [
    "Socioeconomic_Status",
    "Parent_Education_Level",
    "School_Resources",
    "Behavioral_Issues",
]
categorical_transformer = OneHotEncoder(drop="first")

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

# Define pipeline
pipeline = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", RandomForestClassifier(random_state=42)),
    ]
)

# Train the pipeline on training data
pipeline.fit(X_train, y_train)

# Evaluate the model
y_pred = pipeline.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the trained pipeline using cloudpickle
from joblib import dump

# Save the pipeline
dump(pipeline, "student_loan_pipeline.joblib")

print("Model saved successfully!")


              precision    recall  f1-score   support

           0       0.89      0.95      0.92       107
           1       0.94      0.87      0.91        93

    accuracy                           0.92       200
   macro avg       0.92      0.91      0.91       200
weighted avg       0.92      0.92      0.91       200

