<a href="https://colab.research.google.com/github/Shreya-web226/Tems.tech.solution/blob/main/Credit_Scoring_%26_Risk_Assessment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Step 1: Generate a synthetic dataset
np.random.seed(42)  # For reproducibility

# Define parameters for the synthetic data
num_samples = 1000
age = np.random.randint(18, 70, num_samples)  # Age between 18 and 70
income = np.random.normal(50000, 15000, num_samples)  # Average income with some variance
credit_score = np.random.randint(300, 850, num_samples)  # Credit score between 300 and 850
loan_amount = np.random.randint(1000, 50000, num_samples)  # Loan amounts
default = np.random.randint(0, 2, num_samples)  # 0 = No Default, 1 = Default

# Create a DataFrame
data = pd.DataFrame({
    'Age': age,
    'Income': income,
    'Credit Score': credit_score,
    'Loan Amount': loan_amount,
    'Default': default
})

# Step 2: Data Preprocessing
# Check for missing values
print("Missing values in dataset:")
print(data.isnull().sum())

# Feature selection
X = data[['Age', 'Income', 'Credit Score', 'Loan Amount']]
y = data['Default']

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build the Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 5: Make Predictions
y_pred = model.predict(X_test)

# Step 6: Evaluate the Model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 7: Risk Assessment
# Calculate probabilities of default for the test set
default_probabilities = model.predict_proba(X_test)[:, 1]

# Add probabilities to the test set DataFrame for further analysis
results = pd.DataFrame(X_test)
results['Predicted Default'] = y_pred
results['Default Probability'] = default_probabilities

# Display the results
print("\nResults of Credit Scoring & Risk Assessment:")
print(results.head())


Missing values in dataset:
Age             0
Income          0
Credit Score    0
Loan Amount     0
Default         0
dtype: int64

Confusion Matrix:
[[61 38]
 [48 53]]

Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.62      0.59        99
           1       0.58      0.52      0.55       101

    accuracy                           0.57       200
   macro avg       0.57      0.57      0.57       200
weighted avg       0.57      0.57      0.57       200


Results of Credit Scoring & Risk Assessment:
     Age        Income  Credit Score  Loan Amount  Predicted Default  \
521   22  40295.948334           604        48221                  1   
737   49  82177.236917           754        21324                  1   
740   38  59506.653214           696        17544                  0   
660   56  47749.166195           620        12577                  0   
411   59  67539.433857           412        42094                  1   

  