In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [11]:
pip install streamlit



In [12]:


import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


st.title('CA-2 Credit Risk Prediction System')


st.sidebar.title('Upload Dataset')
st.write("""
This project simulates a credit risk prediction system using the UCI Credit Card Default dataset.
Upload your dataset to predict credit risk. For this demo, the UCI dataset is used.
""")

@st.cache_data
def load_data():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls'
    df = pd.read_excel(url, header=1)  # Skip the first row with headers
    return df

# Load and display dataset
df = load_data()
st.write("### Dataset Overview")
st.write(df.head())

# Preprocess the dataset
df.drop(['ID'], axis=1, inplace=True)  # Drop unnecessary ID column

# Define feature matrix X and target vector y
X = df.drop('default payment next month', axis=1)
y = df['default payment next month']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Model evaluation
st.write("### Model Evaluation Metrics")
st.write("#### Confusion Matrix")
st.write(confusion_matrix(y_test, y_pred))

st.write("#### Classification Report")
st.text(classification_report(y_test, y_pred))

# Feature importance
st.write("### Feature Importance")
feature_importance = model.feature_importances_
sorted_idx = feature_importance.argsort()

# Plot feature importance
fig, ax = plt.subplots()
sns.barplot(x=feature_importance[sorted_idx], y=X.columns[sorted_idx], ax=ax)
st.pyplot(fig)

# User input section for predictions
st.write("### Make a Prediction")
st.write("Enter the details to make a prediction")

# Create input fields for user input based on the dataset's features
limit_bal = st.number_input('Credit Limit (BAL)', min_value=0, max_value=1000000, value=200000)
sex = st.selectbox('Sex (1 = Male, 2 = Female)', (1, 2))
education = st.selectbox('Education Level (1 = Graduate School, 2 = University, 3 = High School, 4 = Others)', (1, 2, 3, 4))
marriage = st.selectbox('Marital Status (1 = Married, 2 = Single, 3 = Others)', (1, 2, 3))
age = st.number_input('Age', min_value=18, max_value=100, value=30)

# Add fields for payment history (PAY_0 to PAY_6), bill amounts (BILL_AMT1 to BILL_AMT6), and payment amounts (PAY_AMT1 to PAY_AMT6)
pay_0 = st.number_input('Repayment Status in September (PAY_0)', min_value=-2, max_value=9, value=0)
pay_2 = st.number_input('Repayment Status in August (PAY_2)', min_value=-2, max_value=9, value=0)
pay_3 = st.number_input('Repayment Status in July (PAY_3)', min_value=-2, max_value=9, value=0)
pay_4 = st.number_input('Repayment Status in June (PAY_4)', min_value=-2, max_value=9, value=0)
pay_5 = st.number_input('Repayment Status in May (PAY_5)', min_value=-2, max_value=9, value=0)
pay_6 = st.number_input('Repayment Status in April (PAY_6)', min_value=-2, max_value=9, value=0)

bill_amt1 = st.number_input('Amount of Bill Statement in September (BILL_AMT1)', min_value=0, max_value=1000000, value=50000)
bill_amt2 = st.number_input('Amount of Bill Statement in August (BILL_AMT2)', min_value=0, max_value=1000000, value=48000)
bill_amt3 = st.number_input('Amount of Bill Statement in July (BILL_AMT3)', min_value=0, max_value=1000000, value=45000)
bill_amt4 = st.number_input('Amount of Bill Statement in June (BILL_AMT4)', min_value=0, max_value=1000000, value=43000)
bill_amt5 = st.number_input('Amount of Bill Statement in May (BILL_AMT5)', min_value=0, max_value=1000000, value=41000)
bill_amt6 = st.number_input('Amount of Bill Statement in April (BILL_AMT6)', min_value=0, max_value=1000000, value=39000)

pay_amt1 = st.number_input('Amount Paid in September (PAY_AMT1)', min_value=0, max_value=1000000, value=20000)
pay_amt2 = st.number_input('Amount Paid in August (PAY_AMT2)', min_value=0, max_value=1000000, value=22000)
pay_amt3 = st.number_input('Amount Paid in July (PAY_AMT3)', min_value=0, max_value=1000000, value=18000)
pay_amt4 = st.number_input('Amount Paid in June (PAY_AMT4)', min_value=0, max_value=1000000, value=17000)
pay_amt5 = st.number_input('Amount Paid in May (PAY_AMT5)', min_value=0, max_value=1000000, value=16000)
pay_amt6 = st.number_input('Amount Paid in April (PAY_AMT6)', min_value=0, max_value=1000000, value=15000)

# Create a button for prediction
if st.button('Predict Credit Default Risk'):
    # Arrange the input data as a DataFrame (to match the trained model)
    input_data = pd.DataFrame({
        'LIMIT_BAL': [limit_bal],
        'SEX': [sex],
        'EDUCATION': [education],
        'MARRIAGE': [marriage],
        'AGE': [age],
        'PAY_0': [pay_0],
        'PAY_2': [pay_2],
        'PAY_3': [pay_3],
        'PAY_4': [pay_4],
        'PAY_5': [pay_5],
        'PAY_6': [pay_6],
        'BILL_AMT1': [bill_amt1],
        'BILL_AMT2': [bill_amt2],
        'BILL_AMT3': [bill_amt3],
        'BILL_AMT4': [bill_amt4],
        'BILL_AMT5': [bill_amt5],
        'BILL_AMT6': [bill_amt6],
        'PAY_AMT1': [pay_amt1],
        'PAY_AMT2': [pay_amt2],
        'PAY_AMT3': [pay_amt3],
        'PAY_AMT4': [pay_amt4],
        'PAY_AMT5': [pay_amt5],
        'PAY_AMT6': [pay_amt6]
    })

    # Feature scaling
    input_data_scaled = scaler.transform(input_data)

    # Predict the default risk using the trained model
    prediction = model.predict(input_data_scaled)

    # Output the prediction result
    if prediction[0] == 1:
        st.write("The model predicts the customer is likely to default on payment.")
    else:
        st.write("The model predicts the customer is NOT likely to default on payment.")


2024-09-30 17:13:07.096 No runtime found, using MemoryCacheStorageManager
