<a href="https://colab.research.google.com/github/SoniMehta/data-Science-and-Machine-Learning/blob/main/Risk%20Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load the dataset
data = pd.read_csv('/content/credit_customers (1).csv')

# Step 2: Data Inspection
print(data.head())  # First few rows
print(data.info())  # Column data types and missing values
print(data.describe())  # Summary statistics

# Step 3: Data Cleaning

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='most_frequent')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Handle categorical features (if any) using LabelEncoder
# You can also use pd.get_dummies() for one-hot encoding if needed
le = LabelEncoder()
for column in data_imputed.select_dtypes(include=['object']).columns:
    data_imputed[column] = le.fit_transform(data_imputed[column])

# Step 4: Feature Engineering (if needed)
# You may need to perform additional transformations based on domain knowledge or exploratory data analysis

# Step 5: Splitting the Data
# Assuming 'class' is the target variable - Adjust if necessary
X = data_imputed.drop('class', axis=1)
y = data_imputed['class']

# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Feature Scaling (optional but recommended)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 7: Model Selection - Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 8: Predictions
y_pred = model.predict(X_test_scaled)

# Step 9: Model Evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Accuracy Score:", accuracy_score(y_test, y_pred))


  checking_status  duration                  credit_history  \
0              <0       6.0  critical/other existing credit   
1        0<=X<200      48.0                   existing paid   
2     no checking      12.0  critical/other existing credit   
3              <0      42.0                   existing paid   
4              <0      24.0              delayed previously   

               purpose  credit_amount    savings_status employment  \
0             radio/tv         1169.0  no known savings        >=7   
1             radio/tv         5951.0              <100     1<=X<4   
2            education         2096.0              <100     4<=X<7   
3  furniture/equipment         7882.0              <100     4<=X<7   
4              new car         4870.0              <100     1<=X<4   

   installment_commitment     personal_status other_parties  ...  \
0                     4.0         male single          none  ...   
1                     2.0  female div/dep/mar          none  ...