In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib

In [2]:
# Step 2: Load Dataset
try:
    df = pd.read_csv("application_record.csv")
    print("✅ Dataset loaded successfully!")
except FileNotFoundError:
    print("❌ File not found. Please check the file name or location.")


✅ Dataset loaded successfully!


In [3]:
# Step 3: Handle Missing Values (only OCCUPATION_TYPE has NaNs)
df["OCCUPATION_TYPE"].fillna("Unknown", inplace=True)
print("✅ Missing values handled.")


✅ Missing values handled.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["OCCUPATION_TYPE"].fillna("Unknown", inplace=True)


In [4]:
# Step 4: Encode Categorical Columns
label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
print("✅ Categorical encoding complete.")


✅ Categorical encoding complete.


In [5]:
# Step 5: Define Features and Target
# ⚠️ Since 'approved' column doesn't exist in your dataset,
# we are temporarily generating it randomly for testing.
df["approved"] = np.random.choice([0, 1], size=len(df))  # ❗ Replace with real target if available

X = df.drop(["approved", "ID"], axis=1)
y = df["approved"]


In [6]:
# Step 6: Scale Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
# Step 7: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [8]:
# Step 8: Train Model
model = RandomForestClassifier()
model.fit(X_train, y_train)
print("✅ Model training complete.")


✅ Model training complete.


In [9]:
# Step 9: Save Model and Encoders
joblib.dump(model, "credit_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "encoders.pkl")
print("✅ Model, scaler, and encoders saved.")

✅ Model, scaler, and encoders saved.
