In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
# Load dataset
data = pd.read_csv("loan_prediction.csv")

# Step 1: Drop Loan_ID
data.drop('Loan_ID', axis=1, inplace=True)

# Step 2: Fill missing values
for col in ['Gender', 'Married', 'Dependents', 'Self_Employed']:
    data[col].fillna(data[col].mode()[0], inplace=True)

for col in ['LoanAmount', 'Loan_Amount_Term', 'Credit_History']:
    data[col].fillna(data[col].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].median(), inplace=True)


In [3]:
# Step 3: Encode categorical variables
label_cols = ['Gender', 'Married', 'Education', 'Self_Employed', 'Loan_Status']
for col in label_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# One-hot encode 'Dependents' and 'Property_Area'
data = pd.get_dummies(data, columns=['Dependents', 'Property_Area'], drop_first=True)

In [4]:
# Step 4: Define X and y
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

# Step 5: Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Step 6: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Confirm shape
print("Train shape:", X_train_scaled.shape)
print("Test shape:", X_test_scaled.shape)

Train shape: (491, 14)
Test shape: (123, 14)


In [7]:
%pip install tensorflow

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix

^C
Note: you may need to restart the kernel to use updated packages.


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Step 1: Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Step 2: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 3: Set early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
# Step 4: Train the model
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Step 5: Evaluate on test set
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"\nTest Accuracy: {accuracy:.4f}")

# Step 6: Classification report
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 7: Save the model
model.save("loan_prediction_custom_model.h5")
print("Model saved as 'loan_prediction_custom_model.h5'")
