# Library:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# About Dataset:
In this Loan Status Prediction dataset, we have the data of applicants who previously applied for the loan based on the property which is a Property Loan.
The bank will decide whether to give a loan to the applicant based on some factors such as Applicant Income, Loan Amount, previous Credit History, Co-applicant Income, etc…
Our goal is to build a Machine Learning Model to predict the loan to be approved or to be rejected for an applicant.

# About the loan_data.csv file:
Loan_ID: A unique loan ID.

* Gender: Either male or female.
* Married: Weather Married(yes) or Not Marttied(No).
* Dependents: Number of persons depending on the client.
* Education: Applicant Education(Graduate or Undergraduate).
* Self_Employed: Self-employed (Yes/No).
* ApplicantIncome: Applicant income.
* CoapplicantIncome: Co-applicant income.
* LoanAmount: Loan amount in thousands.
* Loan_Amount_Term: Terms of the loan in months.
* Credit_History: Credit history meets guidelines.
* Property_Area: Applicants are living either Urban, Semi-Urban or Rural.
* Loan_Status: Loan approved (Y/N).

# Data:

In [None]:
df = pd.read_csv('/content/loan_data.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.shape

# Data Analysis:

In [None]:
le = LabelEncoder()
for column in df.columns:
    df[column] = le.fit_transform(df[column])

In [None]:
df["Gender"]=df["Gender"].fillna(df["Gender"].mode().iloc[0])
df["Dependents"]=df["Dependents"].fillna(df["Dependents"].mode().iloc[0])
df["Self_Employed"]=df["Self_Employed"].fillna(df["Self_Employed"].mode().iloc[0])
df["Credit_History"]=df["Credit_History"].fillna(df["Credit_History"].mode().iloc[0])
df["Loan_Amount_Term"]=df["Loan_Amount_Term"].fillna(df["Loan_Amount_Term"].mode().iloc[0])

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Assume X is your feature matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop(['Loan_ID', 'Loan_Status'], axis=1))
df_scaled = pd.DataFrame(X_scaled, columns=df.drop(['Loan_ID', 'Loan_Status'], axis=1).columns)

# Keep 'Loan_ID' and 'Loan_Status' columns in the original DataFrame
df_scaled[['Loan_ID', 'Loan_Status']] = df[['Loan_ID', 'Loan_Status']]

# Display the scaled DataFrame
df_scaled


In [None]:
# Tahlil qilish
correlation_matrix = df_scaled.corr()
plt.figure(figsize=(12,6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.show()


In [None]:
X = df_scaled.drop(['Loan_Status', 'Gender', 'Loan_ID'], axis=1)
y = df_scaled['Loan_Status']
X

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=42)


# Machine Learning:

In [None]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Accuracy: {accuracy}")

In [None]:
model = LogisticRegression(max_iter=10000, C=1.0)
model.fit(X_train,y_train)

# Predict:

In [None]:
predict = model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predict)
print("Accuracy:", accuracy)
print(f'Accuracy Score: {accuracy*100:0.2f}%')

In [None]:
pd.crosstab(y_test, predict, rownames=['True'], colnames=['Predicted'], margins=True)