# Logistic Regression

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'pandas'

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import MinMaxScaler 
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('loan_data.csv')
df.head()

# Data Pre-Processing

In [None]:
df.describe()

## 1. Handling NAN Values:

In [None]:
df.info()

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(df.isnull(), cmap='viridis', cbar=False)
plt.title('Missing Values Heatmap')
plt.show()

# There are no NAN values in the Dataset

# 2. Label Encoding:

In [44]:
categorical_features = ['person_gender', 'person_education', 'person_home_ownership', 'loan_intent', 'previous_loan_defaults_on_file']

In [45]:
label_encoder = LabelEncoder()

for col in categorical_features:
    df[col] = label_encoder.fit_transform(df[col])

In [None]:
for col in categorical_features:
    print(df[col].unique())

# 3. Handling Outliers:

In [None]:
# plt.figure(figsize=(10,8))
# sns.boxplot(data=df, color='lightblue')
# plt.title('Boxplot')
# plt.xticks(rotation=45)
# plt.show()

for graph in df:
    plt.figure(figsize=(8,6))
    sns.boxplot(data=df[graph], color='lightblue')
    plt.title(graph)
    plt.xticks(rotation=45)
    plt.show()

In [48]:
outlier_features =  ['person_age', 'person_income', 'person_emp_exp', 'loan_amnt', 'loan_percent_income', 'cb_person_cred_hist_length', 'credit_score']

In [49]:
for feature in outlier_features:
    Q1 = df[feature].quantile(0.25)  # First quartile
    Q3 = df[feature].quantile(0.75)  # Third quartile
    IQR = Q3 - Q1  # Interquartile range
    lower_whisker = Q1 - 1.5 * IQR
    upper_whisker = Q3 + 1.5 * IQR
    median = df[feature].median()
    
    # Replace values outside the whiskers with the median
    df[feature] = df[feature].apply(lambda x: median if x > upper_whisker or x < lower_whisker else x)


In [None]:
outliers = df[(df[feature] < lower_whisker) | (df[feature] > upper_whisker)]
print(f"{feature}: {len(outliers)} outliers")

In [None]:
for graph in df:
    plt.figure(figsize=(8,6))
    sns.boxplot(data=df[graph], color='lightblue')
    plt.title(graph)
    plt.xticks(rotation=45)
    plt.show()

## 4. Scaling Data:

In [52]:
#  Split Data into Features (X) and Target (y) 

X = df.drop(columns=['loan_status'])
y = df[['loan_status']]

In [53]:
#  Scaling Data

scaler = MinMaxScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)

In [54]:
# Split Data for Training and Testing

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [55]:
# Model Selection

model = LogisticRegression()

In [None]:
# Train Logistic Regression Model

model.fit(X_train,y_train)

In [None]:
# Check Model Coefficients

model.coef_

In [None]:
# Check Model Intercept

model.intercept_

In [None]:
# Evaluate Model Performance

model.score(X_test, y_test)

In [None]:
y_test

In [61]:
# Making Predictions

y_predict = model.predict(X_test)

In [None]:
y_predict

In [63]:
# Get probabilities for each class
probabilities = model.predict_proba(X_test)

In [None]:
probabilities

In [65]:
# Evaluate Predictions

report  = classification_report(y_test, y_predict)

In [None]:
print(report)

In [None]:
confusion_matrix(y_test, y_predict)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_predict)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Accuracy

In [None]:
Accuracy = (6506+1446)/(6506+1446+505+543)
print(f'Accuracy : {Accuracy}')

# Precision

In [None]:
# Class 0
precision_0 = 6506/(6506+505)
print(f'Precision of Class 0 : {precision_0}')

In [None]:
# Class 1
precision_1 = 1446/(1446+505)
print(f'Precision of Class 0 : {precision_1}')

# Recall

In [None]:
# Class 0
Recall_0 = 6506/(6506+543)
print(f'Recall of Class 0 : {Recall_0}')

In [None]:
# Class 1
Recall_1 = 1446/(1446+543)
print(f'Recall of Class 0 : {Recall_1}')