In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils import resample

# Load dataset
file_path = "/content/credit_data.csv"
df = pd.read_csv(file_path)

# Drop CustomerID (not needed for training)
df = df.drop(columns=["CustomerID"])

# Define features (X) and target variable (y)
X = df[['Age', 'Income', 'LoanAmount']]
y = df['CreditScore']

# Convert Credit Score into categorical labels
def categorize_credit_score(score):
    if score >= 700:
        return 2  # Good
    elif 500 <= score < 700:
        return 1  # Fair
    else:
        return 0  # Bad

# Apply categorization
df['CreditCategory'] = df['CreditScore'].apply(categorize_credit_score)

# Define new target variable (classification)
y_class = df['CreditCategory']

# Separate classes
df_class_0 = df[df['CreditCategory'] == 0]  # Bad Credit
df_class_1 = df[df['CreditCategory'] == 1]  # Fair Credit
df_class_2 = df[df['CreditCategory'] == 2]  # Good Credit

# Find the max count among the classes
max_samples = max(len(df_class_0), len(df_class_1), len(df_class_2))

# Oversample minority classes
df_class_0_upsampled = resample(df_class_0, replace=True, n_samples=max_samples, random_state=42)
df_class_1_upsampled = resample(df_class_1, replace=True, n_samples=max_samples, random_state=42)
df_class_2_upsampled = resample(df_class_2, replace=True, n_samples=max_samples, random_state=42)

# Combine all classes into a new balanced dataset
df_balanced = pd.concat([df_class_0_upsampled, df_class_1_upsampled, df_class_2_upsampled])

# Define new features and target
X_balanced = df_balanced[['Age', 'Income', 'LoanAmount']]
y_balanced = df_balanced['CreditCategory']

# Split into train-test sets
X_train_bal, X_test_bal, y_train_bal, y_test_bal = train_test_split(
    X_balanced, y_balanced, test_size=0.2, random_state=42
)

# Scale the features
scaler_balanced = StandardScaler()
X_train_bal_scaled = scaler_balanced.fit_transform(X_train_bal)
X_test_bal_scaled = scaler_balanced.transform(X_test_bal)

# Train new classifier on balanced data
clf_balanced = RandomForestClassifier(n_estimators=100, random_state=42)
clf_balanced.fit(X_train_bal_scaled, y_train_bal)

# Make Predictions
y_pred_balanced = clf_balanced.predict(X_test_bal_scaled)

# Evaluate Model
accuracy_bal = accuracy_score(y_test_bal, y_pred_balanced)
conf_matrix_bal = confusion_matrix(y_test_bal, y_pred_balanced)
class_report_bal = classification_report(y_test_bal, y_pred_balanced)

# Print Evaluation Metrics
print("Model Evaluation on Balanced Dataset:")
print("Accuracy:", accuracy_bal)
print("Confusion Matrix:\n", conf_matrix_bal)
print("Classification Report:\n", class_report_bal)

# User Input Prediction
age = int(input("Enter Age: "))
income = int(input("Enter Income: "))
loan_amount = int(input("Enter Loan Amount: "))

user_data = [[age, income, loan_amount]]
user_data_scaled = scaler_balanced.transform(user_data)
user_predicted_class = clf_balanced.predict(user_data_scaled)

credit_categories = {0: "Bad Credit", 1: "Fair Credit", 2: "Good Credit"}
print("Predicted Credit Category for user input:", credit_categories[user_predicted_class[0]])


Model Evaluation on Balanced Dataset:
Accuracy: 1.0
Confusion Matrix:
 [[2 0 0]
 [0 3 0]
 [0 0 1]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         1

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6

Enter Age: 52
Enter Income: 100000
Enter Loan Amount: 50000
Predicted Credit Category for user input: Good Credit


