1. Import the important python liabraries:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore') 

### Importing & Loading the dataset

2. Create a dataframe to read (first 5 rows) and hold the data:

In [None]:
df = pd.read_csv('Dataset.csv')
df.head()

### Dataset Info:

3. A summary function to give a structured overview of the above dataframe:

In [None]:
df.info()

### Dataset Shape:

4. An attribute to return the dimensions of your dataframe(number of rows and number of columns):

In [None]:
df.shape

### Checking the Missing Values

5. A function to check the entire dataframe for missing values and returns a dataframe of same shape:

In [None]:
df.isnull().sum()

#### First we will fill the Missing Values in "LoanAmount" & "Credit_History" by the 'Mean' & 'Median' of the respective variables.

6. A function to replace the missing values(NaN) in numeric column(LoanAmount,Credit_History) with the mean of that column:

In [None]:
df['LoanAmount'] = df['LoanAmount'].fillna(df['LoanAmount'].mean())

In [None]:
df['Credit_History'] = df['Credit_History'].fillna(df['Credit_History'].median())

### Let's confirm if there are any missing values in 'LoanAmount' & 'Credit_History'

7. A function to check the new dataframe for missing values and returns a dataframe of same shape:

In [None]:
df.isnull().sum()

### Now, Let's drop all the missing values remaining.

8. A function to drop the rows in the dataframe containing at least one missing(NaN) value:

In [None]:
df.dropna(inplace=True)

### Let's check the Missing values for the final time!

9. A function to check the dataframe after dropping missing values and returns a dataframe of same shape(expected to return 0 for all columns):

In [None]:
df.isnull().sum()

### Now, Let's check the final Dataset Shape

10. An attribute to return the dimensions of your dataframe(number of rows and number of columns):

In [None]:
df.shape

#### Comparison between Parameters in getting the Loan:

11. Creates a series of count plots to visualize how different categorical features relate to loan status:

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Data Cleaning(Ensure no missing values in categorical columns before plotting)
df.dropna(subset=['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status'], inplace=True)

plt.figure(figsize=(30, 15))  # Adjust figure size
sns.set(font_scale=2)  # Adjust font size for readability

#Subplot1: Loan status by Gender 
plt.subplot(231)
sns.countplot(x='Gender', hue='Loan_Status', data=df)
plt.title('Loan Status by Gender')

#Subplot2: Loan status by Marital Status 
plt.subplot(232)
sns.countplot(x='Married', hue='Loan_Status', data=df)
plt.title('Loan Status by Marital Status')

#Subplot3: Loan status by Education
plt.subplot(233)
sns.countplot(x='Education', hue='Loan_Status', data=df)
plt.title('Loan Status by Education')

#Subplot4: Loan status by Employment 
plt.subplot(234)
sns.countplot(x='Self_Employed', hue='Loan_Status', data=df)
plt.title('Loan Status by Self Employment')

#Subplot5: Loan status by Property Area
plt.subplot(235)
sns.countplot(x='Property_Area', hue='Loan_Status', data=df)
plt.title('Loan Status by Property Area')


plt.tight_layout() #layout adjustment
plt.show() #display


### Let's replace the Variable values to Numerical form & display the Value Counts

The data in Numerical form avoids disturbances in building the model. 

12. A function to replace the categorical values(Y and N) in Loan_Status column to numerical values(1 and 0):

In [None]:
df['Loan_Status'].replace('Y',1,inplace=True)
df['Loan_Status'].replace('N',0,inplace=True)

13. A function to return the count of the occurence of each unique value(Y and N):

In [None]:
df['Loan_Status'].value_counts()

14. A mapping dictionary to replace each unique categorical value in Gender column to numeric value:

In [None]:
df.Gender=df.Gender.map({'Male':1,'Female':0})
df['Gender'].value_counts()

15. A mapping dictionary to replace each unique categorical value in Married column to numeric value:

In [None]:
df.Married=df.Married.map({'Yes':1,'No':0})
df['Married'].value_counts()

16. A mapping dictionary to transform each unique value in the Dependents column to numeric value and count the occurence of each unique value:

In [None]:
df.Dependents=df.Dependents.map({'0':0,'1':1,'2':2,'3+':3})
df['Dependents'].value_counts()

17. A mapping dictionary to replace each unique categorical value in Education column to numeric value:

In [None]:
df.Education=df.Education.map({'Graduate':1,'Not Graduate':0})
df['Education'].value_counts()

18. A mapping dictionary to replace each unique categorical value in Employed column to numeric value:

In [None]:
df.Self_Employed=df.Self_Employed.map({'Yes':1,'No':0})
df['Self_Employed'].value_counts()

19. A mapping dictionary to replace each unique categorical value in Property_Area column to numeric value:

In [None]:
df.Property_Area=df.Property_Area.map({'Urban':2,'Rural':0,'Semiurban':1})
df['Property_Area'].value_counts()

20. A function to return the count of the occurence of each unique value in the LoanAmount:

In [None]:
df['LoanAmount'].value_counts()

21. A function to return the count of the occurence of each unique value in the Loan_Amount_Term:

In [None]:
df['Loan_Amount_Term'].value_counts()

22. A function to return the count of the occurence of each unique value in the Credit_History:

In [None]:
df['Credit_History'].value_counts()

### Final DataFrame

23. A dataframe to read and hold the first 5 rows of the updated dataframe:

In [None]:
df.head()

### Importing Packages for Classification algorithms

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

### Splitting the data into Train and Test set

In [None]:
X = df.iloc[1:542,1:12].values
y = df.iloc[1:542,12].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=0)

### Logistic Regression (LR)

Logistic regression is a supervised learning classification algorithm used to predict the probability of a target variable. 

Mathematically, a logistic regression model predicts P(Y=1) as a function of X. It is one of the simplest ML algorithms that can be used for various classification problems such as spam detection, Diabetes prediction, cancer detection etc.

In [None]:
model = LogisticRegression()
model.fit(X_train,y_train)

lr_prediction = model.predict(X_test)
print('Logistic Regression accuracy = ', metrics.accuracy_score(lr_prediction,y_test))

In [None]:
print("y_predicted",lr_prediction)
print("y_test",y_test)

**CONCLUSION:**

1. The Loan Status is heavily dependent on the Credit History for Predictions.
2. The Logistic Regression algorithm gives us the maximum Accuracy (79% approx) compared to the other 3 Machine Learning Classification Algorithms.

## ✅ Evaluation Metrics

In [None]:

from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

# Accuracy
accuracy = accuracy_score(y_test, lr_prediction)
print(f"Accuracy: {accuracy:.2f}")

# F1 Score
f1 = f1_score(y_test, lr_prediction)
print(f"F1 Score: {f1:.2f}")

# Confusion Matrix
cm = confusion_matrix(y_test, lr_prediction)
print("Confusion Matrix:\n", cm)

# Classification Report
report = classification_report(y_test, lr_prediction)
print("Classification Report:\n", report)


## 🧩 Predict on New Input

In [None]:

# Function to take user input and predict
def get_user_input():
    Gender = input("Gender (Male/Female): ")
    Married = input("Married (Yes/No): ")
    Dependents = input("Number of Dependents (0/1/2/3+): ")
    Education = input("Education (Graduate/Not Graduate): ")
    Self_Employed = input("Self Employed (Yes/No): ")
    ApplicantIncome = float(input("Applicant Income: "))
    CoapplicantIncome = float(input("Coapplicant Income: "))
    LoanAmount = float(input("Loan Amount: "))
    Loan_Amount_Term = float(input("Loan Amount Term: "))
    Credit_History = float(input("Credit History (1.0/0.0): "))
    Property_Area = input("Property Area (Urban/Semiurban/Rural): ")

    # Create DataFrame
    input_data = pd.DataFrame({
        'Gender': [Gender],
        'Married': [Married],
        'Dependents': [Dependents],
        'Education': [Education],
        'Self_Employed': [Self_Employed],
        'ApplicantIncome': [ApplicantIncome],
        'CoapplicantIncome': [CoapplicantIncome],
        'LoanAmount': [LoanAmount],
        'Loan_Amount_Term': [Loan_Amount_Term],
        'Credit_History': [Credit_History],
        'Property_Area': [Property_Area]
    })

    # Handle preprocessing same as training
    for col in ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']:
        input_data[col].fillna('None', inplace=True)

    # Map categorical data if mapping used in training
    input_data = pd.get_dummies(input_data)

    # Match columns
    missing_cols = set(X_train.columns) - set(input_data.columns)
    for col in missing_cols:
        input_data[col] = 0
    input_data = input_data[X_train.columns]

    prediction = model.predict(input_data)
    print(f"Loan Prediction: {'Approved' if prediction[0] == 'Y' else 'Rejected'}")

get_user_input()
