# Loan Approval Prediction Using Python

In [None]:

# Step 1: Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC


: 

In [None]:

# Step 2: Load the dataset
df = pd.read_csv('loan_prediction.csv')

# Inspect the first few rows
df.head()


In [None]:

# Step 3: Drop the Loan_ID column
df = df.drop('Loan_ID', axis=1)

# Confirm the column has been dropped
df.head()


In [None]:

# Step 4: Fill missing values

# Fill missing values in categorical columns with mode
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Married'].fillna(df['Married'].mode()[0], inplace=True)
df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)

# Fill missing values in numerical columns
df['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)
df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

# Confirm that no missing values remain
df.isnull().sum()


In [None]:

# Step 5: Remove outliers in ApplicantIncome and CoapplicantIncome columns

for col in ['ApplicantIncome', 'CoapplicantIncome']:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

# Confirm the shape of the dataset after outlier removal
df.shape


In [None]:

# Step 6: Convert categorical columns to numerical using one-hot encoding
cat_cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
df = pd.get_dummies(df, columns=cat_cols)

# Confirm the dataset structure after encoding
df.head()


In [None]:

# Step 7: Split the data into training and testing sets

# Split the dataset into features (X) and target (y)
X = df.drop('Loan_Status', axis=1)
y = df['Loan_Status']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Confirm the shapes of the training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


In [None]:

# Step 8: Scale the numerical features using StandardScaler

scaler = StandardScaler()
numerical_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

# Confirm scaling by inspecting the first few rows of the training set
X_train.head()


In [None]:

# Step 9: Train the model using SVC

model = SVC(random_state=42)
model.fit(X_train, y_train)

# Confirm model training completion
"Model training completed!"


In [None]:

# Step 10: Make predictions on the test dataset

# Make predictions
y_pred = model.predict(X_test)

# Convert predictions and testing set to a DataFrame
X_test_df = pd.DataFrame(X_test, columns=X_test.columns)
X_test_df['Loan_Status_Predicted'] = y_pred

# Display the first few rows of the predictions
X_test_df.head()
