In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# Churn Dataset

In [49]:
churn_df = pd.read_csv("Churn_Modelling.csv")
x = churn_df.drop(["RowNumber", "CustomerId", "Surname", "Exited"], axis=1)
y = churn_df["Exited"]
cat = ["Geography", "Gender"]
num = x.drop(cat, axis=1).columns

pre = ColumnTransformer([
    ("num", StandardScaler(), num),
    ("cat", OneHotEncoder(), cat)
])
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = Pipeline(steps=[("pre", pre), ("model", DecisionTreeClassifier(random_state=42))])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print("Churn Accuracy:", accuracy_score(y_test, y_pred))

Churn Accuracy: 0.7805


# Credit Dataset

In [76]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

credit_df = pd.read_csv("credit_scoring.csv")
x2 = credit_df.drop("Type of Loan", axis=1)
y2 = credit_df["Type of Loan"]

cat2 = x2.select_dtypes(include="object").columns
num2 = x2.drop(cat2, axis=1).columns

pre2 = ColumnTransformer([
    ("num", StandardScaler(), num2),
    ("cat", OneHotEncoder(), cat2)
])

x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2, test_size=0.2, random_state=42)

model = Pipeline([
    ("preprocessor", pre2),
    ("classifier", RandomForestClassifier(random_state=42))
])

model.fit(x2_train, y2_train)
y2_pred = model.predict(x2_test)

print("Accuracy:", accuracy_score(y2_test, y2_pred))
print("Precision:", precision_score(y2_test, y2_pred, average='weighted'))
print("Recall:", recall_score(y2_test, y2_pred, average='weighted'))
print("F1 Score:", f1_score(y2_test, y2_pred, average='weighted'))
print("Confusion Matrix:\n", confusion_matrix(y2_test, y2_pred))


Accuracy: 0.35
Precision: 0.36329786907393763
Recall: 0.35
F1 Score: 0.3522788999201187
Confusion Matrix:
 [[26 24 29]
 [17 25 21]
 [16 23 19]]


In [74]:
credit_df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.20,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...,...
995,59,Male,Divorced,High School,Employed,0.74,1285.0,8,3530000,12.99,48,Auto Loan
996,64,Male,Divorced,Bachelor,Unemployed,0.77,1857.0,2,1377000,18.02,60,Home Loan
997,63,Female,Single,Master,Self-Employed,0.18,2628.0,10,2443000,18.95,12,Personal Loan
998,51,Female,Married,PhD,Self-Employed,0.32,1142.0,3,1301000,1.80,24,Auto Loan
