# Loan model training (Colab)

This notebook trains a simple RandomForest model on the Kaggle `loan_train.csv`. Download dataset and upload to the Colab session under `/content/loan_train.csv` or mount Google Drive.

In [ ]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
import joblib
import os

path = '/content/loan_train.csv'  # upload file here in Colab
df = pd.read_csv(path)
df.head()


In [ ]:
# Basic preprocessing: create target y from Loan_Status (Y/N)
df['Loan_Status'] = df['Loan_Status'].map({'Y':1,'N':0})
y = df['Loan_Status']
X = df.drop(columns=['Loan_ID','Loan_Status']) if 'Loan_ID' in df.columns else df.drop(columns=['Loan_Status'])

numeric_cols = X.select_dtypes(include=['int64','float64']).columns.tolist()
cat_cols = X.select_dtypes(include=['object']).columns.tolist()

numeric_cols = [c for c in numeric_cols if c not in ['Loan_Amount_Term']][:6]
cat_cols = cat_cols[:6]

preproc = ColumnTransformer([
    ('num', Pipeline([('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]), numeric_cols),
    ('cat', Pipeline([('imputer', SimpleImputer(strategy='most_frequent')), ('onehot', OneHotEncoder(handle_unknown='ignore'))]), cat_cols)
])
clf = Pipeline([('pre', preproc), ('rf', RandomForestClassifier(n_estimators=100, random_state=42))])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)
clf.fit(X_train, y_train)
print('Training done')
os.makedirs('/content/model', exist_ok=True)
joblib.dump(clf, '/content/model/loan_model.pkl')
print('Saved model to /content/model/loan_model.pkl')


### Next steps in Colab

1. Download `model/loan_model.pkl` and upload it into `backend/model/loan_model.pkl` in the repository, or push the model to your repo.  
2. Run the backend and frontend locally or deploy to Render/Streamlit Cloud.