# Loan Default Prediction - Data Cleaning & Model Training

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib


## Step 1: Load the dataset

In [None]:

df = pd.read_csv("train-dataset - Copy.csv")
df.head()


## Step 2: Drop rows with missing values

In [None]:

df_cleaned = df.dropna()
df_cleaned.shape


## Step 3: Select useful features and target

In [None]:

selected_features = [
    'Loan Amount', 'Funded Amount', 'Funded Amount Investor', 'Term',
    'Interest Rate', 'Grade', 'Employment Duration', 'Home Ownership',
    'Annual Income', 'Verification Status', 'Purpose', 'Debt-To-Income Ratio',
    'State', 'Total Accounts', 'Application Type'
]

X = df_cleaned[selected_features]
y = df_cleaned['Loan Status']


## Step 4: Encode categorical features

In [None]:

categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le


## Step 5: Train/test split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 6: Train Logistic Regression model

In [None]:

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


## Step 7: Evaluate the model

In [None]:

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


## Step 8: Save the model and encoders

In [None]:

joblib.dump(model, "loan_default_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
