<a href="https://colab.research.google.com/github/MohammedNihalLDR/Machine-learning-ML-/blob/main/IBM_Finalproject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset
file_path = "https://raw.githubusercontent.com/prasertcbs/basic-dataset/refs/heads/master/Loan-Approval-Prediction.csv"
df = pd.read_csv(file_path)
# Removing the 'Loan_ID' column as it is not a useful feature
df.drop(columns=['Loan_ID'], inplace=True)
# Verifying the column is removed
# Step 4: Handling Missing Values

# Numerical columns: Using median because it is less affected by outliers
df['LoanAmount'] = df['LoanAmount'].fillna(df['LoanAmount'].median())
df['Loan_Amount_Term'] = df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].median())

# Categorical columns: Using mode because it represents the most common value
df['Credit_History'] = df['Credit_History'].fillna(df['Credit_History'].mode()[0])
df["Dependents"]=df["Dependents"].replace("3+","3")
#fill the null value in Gender="Female",married="No",selfemployed="yes",dependent="3"
df.update(df[["Gender", "Married", "Self_Employed", "Dependents"]].fillna({"Gender": "Female",
                                                                            "Married": "No",
                                                                            "Self_Employed": "Yes",
                                                                            "Dependents": "3"}))
# Step 5: Removing Duplicate Records
# Dropping duplicate rows from the dataset (if any)
df.drop_duplicates(inplace=True)
# Function to remove outliers using IQR method
def remove_outliers_iqr(data, column):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    return data[(data[column] >= lower_bound) & (data[column] <= upper_bound)]

# Applying IQR method to remove outliers and sort
dff = df.sort_values(by=['ApplicantIncome'], ascending=False)#sort
df_cleaned = remove_outliers_iqr(dff, 'ApplicantIncome')#outlier
dff2 = df_cleaned.sort_values(by=['CoapplicantIncome'], ascending=False)#sort
df_cleane = remove_outliers_iqr(dff2, 'CoapplicantIncome')#outlier
dff3 = df_cleane.sort_values(by=['LoanAmount'], ascending=False)#sort
df_clean = remove_outliers_iqr(dff3, 'LoanAmount')#outlier

from sklearn.preprocessing import LabelEncoder

# Ensure df_clean is a deep copy to avoid the SettingWithCopyWarning
df_clean = df_clean.copy()

# List of categorical columns
categorical_columns = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status', 'Dependents']

# Fill missing values with mode before encoding
for col in categorical_columns:
    df_clean.loc[:, col] = df_clean[col].fillna(df_clean[col].mode()[0])

# Apply Label Encoding safely using .loc
label_encoders = {}  # Store encoders for inverse transformation

for col in categorical_columns:
    le = LabelEncoder()
    df_clean.loc[:, col] = le.fit_transform(df_clean[col])  # Apply encoding using .loc
    label_encoders[col] = le  # Save encoder for later use

# Explicitly convert 'Loan_Status' to int dtype
df_clean['Loan_Status'] = df_clean['Loan_Status'].astype(int)


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# 1️⃣ Define the target variable (y) and features (X)
X = df_clean.drop(columns=['Loan_Status'])  # Features
y = df_clean['Loan_Status']  # Target variable

# 2️⃣ Split the dataset into training & testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 3️⃣ Standardize the numerical features (Scale for better model performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit & transform training data
X_test_scaled = scaler.transform(X_test)  # Transform test data

# 4️⃣ Initialize & Train the Model (KNN)
model = KNeighborsClassifier(n_neighbors=12)  # You can change 'n_neighbors' for tuning
model.fit(X_train_scaled, y_train)  # Model fitting

# 5️⃣ Make Predictions
y_pred = model.predict(X_test_scaled)

# 6️⃣ Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy:.4f}")  # Print accuracy
print("🔹 Classification Report:\n", classification_report(y_test, y_pred))

# 7️⃣ Predict on Training & Testing Data
y_train_pred = model.predict(X_train_scaled)  # Training predictions
y_test_pred = model.predict(X_test_scaled)  # Testing predictions

# 8️⃣ Calculate Training & Testing Accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

# 9️⃣ Display Results
print(f"🎯 Training Accuracy: {train_accuracy:.4f}")
print(f"🧪 Testing Accuracy: {test_accuracy:.4f}")

# 🔹 Display Classification Report for Testing Data
print("🔹 Classification Report (Testing Data):\n", classification_report(y_test, y_test_pred))

✅ Model Accuracy: 0.8077
🔹 Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.41      0.57        32
           1       0.79      0.99      0.88        72

    accuracy                           0.81       104
   macro avg       0.86      0.70      0.72       104
weighted avg       0.83      0.81      0.78       104

🎯 Training Accuracy: 0.8341
🧪 Testing Accuracy: 0.8077
🔹 Classification Report (Testing Data):
               precision    recall  f1-score   support

           0       0.93      0.41      0.57        32
           1       0.79      0.99      0.88        72

    accuracy                           0.81       104
   macro avg       0.86      0.70      0.72       104
weighted avg       0.83      0.81      0.78       104



In [None]:
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

# Mapping dictionary for categorical inputs
gender_mapping = {"Female": 0, "Male": 1}
married_mapping = {"No": 0, "Yes": 1}
dependents_mapping = {"None": 0, "One": 1, "Two": 2, "Three": 3,"Four or More": 4}
education_mapping = {"Graduate": 0, "Not Graduate": 1}
self_employed_mapping = {"No": 0, "Yes": 1}
credit_history_mapping = {"Bad": 0, "Good": 1}
property_area_mapping = {"Rural": 0, "Semiurban": 1, "Urban": 2}

# Dropdowns for categorical variables (with spacing)
gender_dropdown = widgets.Dropdown(options=gender_mapping.keys(), description="Gender:  ", style={'description_width': '150px'})
married_dropdown = widgets.Dropdown(options=married_mapping.keys(), description="Married:  ", style={'description_width': '150px'})
dependents_dropdown = widgets.Dropdown(options=dependents_mapping.keys(), description="Dependents:  ", style={'description_width': '150px'})
education_dropdown = widgets.Dropdown(options=education_mapping.keys(), description="Education:  ", style={'description_width': '150px'})
self_employed_dropdown = widgets.Dropdown(options=self_employed_mapping.keys(), description="Self Employed:  ", style={'description_width': '150px'})
credit_history_dropdown = widgets.Dropdown(options=credit_history_mapping.keys(), description="Credit History:  ", style={'description_width': '150px'})
property_area_dropdown = widgets.Dropdown(options=property_area_mapping.keys(), description="Property Area:  ", style={'description_width': '150px'})

# Numeric inputs with labels
applicant_income_input = widgets.FloatText(description="Applicant Income:  ", style={'description_width': '150px'})
coapplicant_income_input = widgets.FloatText(description="Coapplicant Income:  ", style={'description_width': '150px'})
loan_amount_input = widgets.FloatText(description="Loan Amount:  ", style={'description_width': '150px'})
loan_term_input = widgets.FloatText(description="Loan Term (days):  ", style={'description_width': '150px'})

# Prediction button
predict_button = widgets.Button(description="Check Loan Approval", button_style='success')
output_label = widgets.Output()

# Function to predict loan approval
def predict_loan_approval(b):
    with output_label:
        clear_output()

        # Convert dropdown selections to numeric values
        user_data = np.array([[gender_mapping[gender_dropdown.value],
                               married_mapping[married_dropdown.value],
                               dependents_mapping[dependents_dropdown.value],
                               education_mapping[education_dropdown.value],
                               self_employed_mapping[self_employed_dropdown.value],
                               applicant_income_input.value,
                               coapplicant_income_input.value,
                               loan_amount_input.value,
                               loan_term_input.value,
                               credit_history_mapping[credit_history_dropdown.value],
                               property_area_mapping[property_area_dropdown.value]]])

        # Standardizing the input
        user_data_scaled = scaler.transform(user_data)  # No error now!

        # Making prediction
        prediction = model.predict(user_data_scaled)

        # Display result
        result_text = "🎉 Loan Approved! ✅" if prediction[0] == 1 else "🚫 Loan Not Approved ❌"
        print(result_text)

# Link button click to function
predict_button.on_click(predict_loan_approval)

# Layout for better spacing
form_layout = widgets.VBox([
    gender_dropdown, married_dropdown, dependents_dropdown, education_dropdown,
    self_employed_dropdown, credit_history_dropdown, property_area_dropdown,
    applicant_income_input, coapplicant_income_input, loan_amount_input, loan_term_input,
    widgets.HTML("<br>"),  # Extra spacing
    predict_button, output_label
])

# Display form
display(form_layout)


VBox(children=(Dropdown(description='Gender:  ', options=('Female', 'Male'), style=DescriptionStyle(descriptio…