In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats

import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
data = pd.read_csv('/kaggle/input/loan-data/loan.csv')
data

In [None]:
data.info()

In [None]:
numerical =['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount','Loan_Amount_Term','Credit_History']
categorical=['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed','Property_Area']

In [None]:
fig,axes = plt.subplots(3,2,figsize=(12,15))
for idx,cat in enumerate(categorical):
    row,col = idx//2,idx%2
    sns.countplot(x=cat,data=data,hue='Loan_Status',ax=axes[row,col])
plt.subplots_adjust(hspace=1)

In [None]:
data.duplicated().sum()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data['Gender'].fillna(data['Gender'].mode()[0],inplace=True)
data['Married'].fillna(data['Married'].mode()[0],inplace=True)
data['Dependents'].fillna(data['Dependents'].mode()[0],inplace=True)
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0],inplace=True)

In [None]:
data['LoanAmount'].fillna(data['LoanAmount'].mean(),inplace=True)
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mean(),inplace=True)
data['Credit_History'].fillna(data['Credit_History'].mean(),inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data.info()

In [None]:
fig,axes = plt.subplots(1,5,figsize=(17,5))
for idx,cat_col in enumerate(numerical):
    sns.boxplot(y=cat_col,data=data,x='Loan_Status',ax=axes[idx])

plt.subplots_adjust(hspace=1)

In [None]:
data=data[(data['ApplicantIncome']<=8000)&(data['CoapplicantIncome']<=6000)&
          (data['LoanAmount']<=220) &(data['LoanAmount']>=100)]

In [None]:
fig,axes = plt.subplots(1,5,figsize=(17,5))

for idx,cat_col in enumerate(numerical):

    sns.boxplot(y=cat_col,data=data,x='Loan_Status',ax=axes[idx])

plt.subplots_adjust(hspace=1)

In [None]:
data_encoding=data.copy()

In [None]:
data_encoding['Gender']=data_encoding['Gender'].factorize(['Female','Male'])[0]
data_encoding['Gender'].value_counts()

In [None]:
data_encoding['Married']=data_encoding['Married'].factorize(['Yes','No'])[0]
data_encoding['Married'].value_counts()

In [None]:
data_encoding['Education']=data_encoding['Education'].factorize(['Graduate','Not Graduate'])[0]
data_encoding['Education'].value_counts()

In [None]:
data_encoding['Self_Employed']=data_encoding['Self_Employed'].factorize(['Yes','No'])[0]
data_encoding['Self_Employed'].value_counts()

In [None]:
data_encoding['Property_Area'].value_counts()

In [None]:
data_encoding=data_encoding.drop(['Loan_ID'],axis=1)

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
data_encoding['Property_Area']=le.fit_transform(data_encoding['Property_Area'])

In [None]:
data_encoding['Loan_Status']=data_encoding['Loan_Status'].factorize(['Y','N'])[0]
data_encoding['Loan_Status'].value_counts()

In [None]:
data_encoding

In [None]:
data_encoding.dtypes

In [None]:
data_encoding['Dependents'].value_counts()

In [None]:
data_encoding['Dependents']=le.fit_transform(data_encoding['Dependents'])

In [None]:
data_encoding.dtypes

In [None]:
data_encoding.dtypes

In [None]:
data.describe()

In [None]:
data_encoding.head()

In [None]:
X = data_encoding.drop(['Loan_Status'], axis=1)
y = data_encoding['Loan_Status']

In [None]:
sns.set_theme(style='darkgrid')
sns.countplot(y=y,data=data)
plt.show()

In [None]:
from imblearn.over_sampling import SMOTE
X, y = SMOTE().fit_resample(X,y)
sns.set_theme(style='darkgrid')
sns.countplot(y=y,data=data)
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
X=MinMaxScaler().fit_transform(X)
X

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('x_train shape: ', X_train.shape)
print('x_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

In [None]:
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=4, gamma=3)

In [None]:
model.fit(X_train,y_train)

In [None]:
prediction = model.predict(X_test)
print(prediction)

In [None]:
model.score(X_test,y_test)

In [None]:
confusion_matrix(y_test,prediction)

In [None]:
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,confusion_matrix,auc,roc_curve,classification_report

In [None]:
tp,fp,t=roc_curve(y_test,prediction)

In [None]:
plt.plot(tp,fp)
plt.show()

In [None]:
auc(tp,fp)

In [None]:
precision_score(y_test,prediction)