# 📊 German Credit Risk Prediction Project

### 🔹 Step 1: Data Preprocessing

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("processed_german_credit.csv")

# Drop rows with missing values
df.dropna(inplace=True)

# Encode categorical variables
df_encoded = pd.get_dummies(df, drop_first=True)

# Features and Target
X = df_encoded.drop("Risk", axis=1)
y = df_encoded["Risk"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save preprocessed data
X_train.to_csv("X_train.csv", index=False)
X_test.to_csv("X_test.csv", index=False)
y_train.to_csv("y_train.csv", index=False)
y_test.to_csv("y_test.csv", index=False)


### 🔹 Step 2: Model Training

In [None]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# Load split data
X_train = pd.read_csv("X_train.csv")
y_train = pd.read_csv("y_train.csv").squeeze()

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Save model
joblib.dump(clf, "credit_model.pkl")


### 🔹 Step 3: Streamlit UI Code (for separate `app.py` file)

In [None]:

# Save this code separately as app.py for Streamlit use

import streamlit as st
import pandas as pd
import joblib

model = joblib.load("credit_model.pkl")

st.title("German Credit Risk Prediction")

age = st.slider("Age", 18, 75)
credit_amount = st.number_input("Credit Amount")
duration = st.slider("Duration (in months)", 6, 60)
job = st.selectbox("Job Type", [0, 1, 2, 3])
housing = st.selectbox("Housing", ['own', 'free', 'rent'])
sex = st.selectbox("Sex", ['male', 'female'])

input_dict = {
    'Age': age,
    'Credit amount': credit_amount,
    'Duration': duration,
    'Job': job,
    'Sex_male': 1 if sex == 'male' else 0,
    'Housing_own': 1 if housing == 'own' else 0,
    'Housing_rent': 1 if housing == 'rent' else 0,
}

input_data = pd.DataFrame([input_dict])
for col in model.feature_names_in_:
    if col not in input_data.columns:
        input_data[col] = 0

input_data = input_data[model.feature_names_in_]

prediction = model.predict(input_data)[0]
st.write("Prediction:", "✅ Good Credit Risk" if prediction == 1 else "❌ Bad Credit Risk")
