In [3]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import joblib
import pickle

# Load the dataset
loan_dataset = pd.read_csv(r'C:\Users\aditi mishra\Downloads\loan_prediction.csv')  # Replace with your file path if needed

# Basic info
print("First 5 Rows:")
print(loan_dataset.head())

print("\nShape of dataset:", loan_dataset.shape)

print("\nDataset description:")
print(loan_dataset.describe())

print("\nMissing values:")
print(loan_dataset.isnull().sum())

# Drop missing values
loan_dataset = loan_dataset.dropna()

print("\nMissing values after dropping:")
print(loan_dataset.isnull().sum())

# Encode target label
loan_dataset['Loan_Status'] = loan_dataset['Loan_Status'].replace({'N': 0, 'Y': 1})

# Replace '3+' with 4 in Dependents
loan_dataset['Dependents'] = loan_dataset['Dependents'].replace('3+', 4)


# Encode categorical features
loan_dataset = loan_dataset.replace({
    'Married': {'No': 0, 'Yes': 1},
    'Gender': {'Male': 1, 'Female': 0},
    'Self_Employed': {'No': 0, 'Yes': 1},
    'Property_Area': {'Rural': 0, 'Semiurban': 1, 'Urban': 2},
    'Education': {'Graduate': 1, 'Not Graduate': 0}
})


# Split into features and label
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

# Split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=2)

# Model training: SVM with linear kernel
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

# Accuracy on training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
print("\nAccuracy on training data:", training_data_accuracy)

# Accuracy on test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
print("Accuracy on test data:", test_data_accuracy)

# Save the model to a file
filename = 'loan_model.sav'
pickle.dump(classifier, open(filename, 'wb'))

# Save accuracy scores
with open('accuracy.txt', 'w') as f:
    f.write(f"Training Accuracy: {training_data_accuracy:.2f}\n")
    f.write(f"Test Accuracy: {test_data_accuracy:.2f}\n")

print("\nModel and accuracy file saved successfully!")


First 5 Rows:
    Loan_ID Gender Married Dependents     Education Self_Employed  \
0  LP001002   Male      No          0      Graduate            No   
1  LP001003   Male     Yes          1      Graduate            No   
2  LP001005   Male     Yes          0      Graduate           Yes   
3  LP001006   Male     Yes          0  Not Graduate            No   
4  LP001008   Male      No          0      Graduate            No   

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0             5849                0.0         NaN             360.0   
1             4583             1508.0       128.0             360.0   
2             3000                0.0        66.0             360.0   
3             2583             2358.0       120.0             360.0   
4             6000                0.0       141.0             360.0   

   Credit_History Property_Area Loan_Status  
0             1.0         Urban           Y  
1             1.0         Rural           N  
2     

  loan_dataset['Loan_Status'] = loan_dataset['Loan_Status'].replace({'N': 0, 'Y': 1})
  loan_dataset = loan_dataset.replace({



Accuracy on training data: 0.7986111111111112
Accuracy on test data: 0.8333333333333334

Model and accuracy file saved successfully!
