In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score

# Load dataset
df = pd.read_csv("train.csv")
# Assuming 'df' is your DataFrame containing the dataset

# Define features and target
X = df.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = df['Loan_Status']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing pipeline
numeric_features = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']
categorical_features = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Append classifier to preprocessing pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression())])

# Fit the model
clf.fit(X_train, y_train)

# Predict on test data
y_pred = clf.predict(X_test)

# Model evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Cross-validation
cv_scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
print("\nCross-validation Accuracy:", cv_scores.mean())

# ROC AUC score
y_pred_proba = clf.predict_proba(X_test)[:,1]  # Probability of class 1 (loan approval)
print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))


Accuracy: 0.7886178861788617

Classification Report:
              precision    recall  f1-score   support

           N       0.95      0.42      0.58        43
           Y       0.76      0.99      0.86        80

    accuracy                           0.79       123
   macro avg       0.85      0.70      0.72       123
weighted avg       0.83      0.79      0.76       123


Cross-validation Accuracy: 0.8078368652538984

ROC AUC Score: 0.7491279069767443


In [3]:
import pandas as pd

# Example input data
input_data = {
    'Gender': ['Male'],
    'Married': ['Yes'],
    'Dependents': ['1'],
    'Education': ['Graduate'],
    'Self_Employed': ['No'],
    'ApplicantIncome': [5000],
    'CoapplicantIncome': [2000],
    'LoanAmount': [300],
    'Loan_Amount_Term': [360],
    'Credit_History': [1.0],
    'Property_Area': ['Urban']
}

# Create a DataFrame from the input data
input_df = pd.DataFrame(input_data)

# Print the input DataFrame
print("Input Data:")
print(input_df)


Input Data:
  Gender Married Dependents Education Self_Employed  ApplicantIncome  \
0   Male     Yes          1  Graduate            No             5000   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0               2000         300               360             1.0   

  Property_Area  
0         Urban  


In [4]:
# Predict loan approval for the input data
predicted_loan_status = clf.predict(input_df)

# Print the prediction
if predicted_loan_status[0] == 'Y':
    print("\nThe loan is predicted to be Approved (Y).")
else:
    print("\nThe loan is predicted to be Not Approved (N).")



The loan is predicted to be Approved (Y).


In [5]:
# Example input data for Not Approved prediction
input_data_not_approved = {
    'Gender': ['Female'],
    'Married': ['No'],
    'Dependents': ['0'],
    'Education': ['Not Graduate'],
    'Self_Employed': ['Yes'],
    'ApplicantIncome': [2500],
    'CoapplicantIncome': [1500],
    'LoanAmount': [200],
    'Loan_Amount_Term': [360],
    'Credit_History': [0.0],
    'Property_Area': ['Rural']
}

# Create a DataFrame from the input data
input_df_not_approved = pd.DataFrame(input_data_not_approved)

# Print the input DataFrame
print("Input Data (Not Approved):")
print(input_df_not_approved)

# Predict loan approval for the input data
predicted_loan_status_not_approved = clf.predict(input_df_not_approved)

# Print the prediction
if predicted_loan_status_not_approved[0] == 'Y':
    print("\nThe loan is predicted to be Approved (Y).")
else:
    print("\nThe loan is predicted to be Not Approved (N).")


Input Data (Not Approved):
   Gender Married Dependents     Education Self_Employed  ApplicantIncome  \
0  Female      No          0  Not Graduate           Yes             2500   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0               1500         200               360             0.0   

  Property_Area  
0         Rural  

The loan is predicted to be Not Approved (N).
