In [1]:
import pandas as pd

df = pd.read_csv("/content/UCI_Credit_Card.csv")
df.head()


Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Encode target if needed
if y.dtype == 'object':
    y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)


In [4]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)


In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)


In [6]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)


In [7]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)


In [8]:
!pip install xgboost

from xgboost import XGBClassifier
xgb = XGBClassifier(eval_metric='logloss')
xgb.fit(X_train, y_train)




In [18]:
from sklearn.metrics import *

def evaluate(model):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]

    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "MCC": matthews_corrcoef(y_test, y_pred)
    }


In [10]:
models = {
    "Logistic Regression": lr,
    "Decision Tree": dt,
    "KNN": knn,
    "Naive Bayes": nb,
    "Random Forest": rf,
    "XGBoost": xgb
}

results = pd.DataFrame({name: evaluate(m) for name,m in models.items()}).T
results


Unnamed: 0,Accuracy,AUC,Precision,Recall,F1,MCC
Logistic Regression,0.809833,0.727056,0.692825,0.235339,0.351336,0.324868
Decision Tree,0.727333,0.614121,0.385217,0.412795,0.398529,0.222731
KNN,0.795333,0.697642,0.550898,0.350343,0.428305,0.322926
Naive Bayes,0.7085,0.737279,0.398416,0.651181,0.494363,0.324111
Random Forest,0.816667,0.762864,0.643338,0.364052,0.464981,0.385993
XGBoost,0.815833,0.766529,0.640162,0.361767,0.462287,0.38281


In [11]:
import joblib
joblib.dump(lr, "logistic.pkl")
joblib.dump(dt, "dt.pkl")
joblib.dump(knn, "knn.pkl")
joblib.dump(nb, "nb.pkl")
joblib.dump(rf, "rf.pkl")
joblib.dump(xgb, "xgb.pkl")


['xgb.pkl']

In [12]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
from sklearn.metrics import *

st.title("ML Classification Assignment")

model_name = st.selectbox("Select Model", [
    "Logistic Regression","Decision Tree","KNN",
    "Naive Bayes","Random Forest","XGBoost"
])

uploaded = st.file_uploader("Upload Test CSV", type="csv")

if uploaded:
    data = pd.read_csv(uploaded)

    models = {
        "Logistic Regression": joblib.load("logistic.pkl"),
        "Decision Tree": joblib.load("dt.pkl"),
        "KNN": joblib.load("knn.pkl"),
        "Naive Bayes": joblib.load("nb.pkl"),
        "Random Forest": joblib.load("rf.pkl"),
        "XGBoost": joblib.load("xgb.pkl")
    }

    model = models[model_name]
    preds = model.predict(data)
    st.write("Predictions:", preds)


Writing app.py


In [13]:
%%writefile requirements.txt
streamlit
scikit-learn
numpy
pandas
xgboost
joblib


Writing requirements.txt


In [14]:
import os
os.makedirs("model", exist_ok=True)


In [15]:
import joblib

joblib.dump(lr, "model/logistic.pkl")
joblib.dump(dt, "model/dt.pkl")
joblib.dump(knn, "model/knn.pkl")
joblib.dump(nb, "model/nb.pkl")
joblib.dump(rf, "model/rf.pkl")
joblib.dump(xgb, "model/xgb.pkl")


['model/xgb.pkl']

In [16]:
import os
os.listdir("model")

['dt.pkl', 'knn.pkl', 'logistic.pkl', 'xgb.pkl', 'rf.pkl', 'nb.pkl']

In [17]:
import joblib

lr = joblib.load("model/logistic.pkl")
dt = joblib.load("model/dt.pkl")
knn = joblib.load("model/knn.pkl")
nb = joblib.load("model/nb.pkl")
rf = joblib.load("model/rf.pkl")
xgb = joblib.load("model/xgb.pkl")
