# Credit Scoring Model

This notebook is used for exploratory data analysis, model training, and evaluation for the credit scoring model. It includes code and visualizations to help understand the data and the performance of the model.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

print("Libraries imported successfully!")

Libraries imported successfully!


In [8]:
file = 'loan_data.csv'  

try:
    df = pd.read_csv(file)
    print(f"Dataset loaded successfully from '{file}'")
    print(f"Dataset shape: {df.shape}")
except FileNotFoundError:
    print(f"Error: The file '{file}' was not found.")
    df = None
except Exception as e:
    print(f"An error occurred while loading the file: {e}")
    df = None

if df is not None:
    print("\nFirst 5 rows:")
    display(df.head())

Error: The file 'loan_data.csv' was not found.


In [None]:
# Exploratory Data Analysis
if df is not None:
    print("\nDataFrame Info:")
    df.info()
    
    print("\nDescriptive Statistics:")
    display(df.describe())
    
    # Visualizations
    plt.figure(figsize=(10, 6))
    sns.countplot(x='target_variable', data=df)  # Replace 'target_variable' with actual target column
    plt.title('Distribution of Target Variable')
    plt.show()

In [None]:
# Model Training
if df is not None:
    # Preprocessing and feature selection
    X = df.drop('target_variable', axis=1)  # Replace 'target_variable' with actual target column
    y = df['target_variable']  # Replace 'target_variable' with actual target column
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardizing the data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Training the model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    # Predictions
    y_pred = model.predict(X_test)
    
    # Evaluation
    print("\nModel Evaluation:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    # Confusion Matrix
    plt.figure(figsize=(10, 6))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()