# Loan Score Prediction Model

In [None]:

import pandas as pd
import xgboost as xg
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:

# Load datasets
test_data_1 = pd.read_csv('/mnt/data/Test data.xlsx - Query result.csv', encoding='latin1')
test_data_2 = pd.read_csv('/mnt/data/Model test data.csv', encoding='latin1')
loan_status_data = pd.read_csv('/mnt/data/Loan Status prediction data.xlsx - Query result.csv', encoding='latin1')

# Display the first few rows of each dataframe
print("Test Data 1:")
print(test_data_1.head())
print("\nTest Data 2:")
print(test_data_2.head())
print("\nLoan Status Data:")
print(loan_status_data.head())


In [None]:

# Data preprocessing steps
def preprocess_data(df):
    # Example preprocessing steps
    # Handle missing values
    df = df.fillna(method='ffill')

    # Encode categorical variables
    le = LabelEncoder()
    for column in df.select_dtypes(include=['object']).columns:
        df[column] = le.fit_transform(df[column].astype(str))
    
    return df

# Preprocess the datasets
test_data_1 = preprocess_data(test_data_1)
test_data_2 = preprocess_data(test_data_2)
loan_status_data = preprocess_data(loan_status_data)

# Display the first few rows of the preprocessed data
print("Preprocessed Test Data 1:")
print(test_data_1.head())
print("\nPreprocessed Test Data 2:")
print(test_data_2.head())
print("\nPreprocessed Loan Status Data:")
print(loan_status_data.head())


In [None]:

# Example model training and evaluation
X = loan_status_data.drop('status_id', axis=1)
y = loan_status_data['status_id']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()


In [None]:

# Update the output labels
y_pred_labels = ['Paid' if x == 2 else 'Unpaid' for x in y_pred]

# Create a DataFrame with the results
results = X_test.copy()
results['Predicted Status'] = y_pred_labels
results['Actual Status'] = ['Paid' if x == 2 else 'Unpaid' for x in y_test]

# Save the results to a CSV file
results.to_csv('/mnt/data/loan_status_predictions.csv', index=False)
print("Results saved to loan_status_predictions.csv")
