## Bagging

In [7]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

In [8]:
df = pd.read_csv("creditcarddata.csv")
df.head()

Unnamed: 0,Gender,Age,HouseTypeID,ContactAvaliabilityID,HomeCountry,AccountNo,CardExpiryDate,TransactionAmount,TransactionCountry,LargePurchase,ProductID,CIF,TransactionCurrencyCode,PotentialFraud
0,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1,0
1,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1,0
2,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1,0
3,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1,0
4,0,56,1,0,1,1109976,1811,1.0354,1,0,3,11020290,1,0


In [9]:
df.shape

(2266, 14)

In [10]:
X = df.drop(columns=['PotentialFraud']) #df[['loan_amnt','int_rate','annual_inc','dti']]
y = df['PotentialFraud']

In [11]:
from collections import Counter

Counter(y)

Counter({0: 2013, 1: 253})

In [12]:
X

Unnamed: 0,Gender,Age,HouseTypeID,ContactAvaliabilityID,HomeCountry,AccountNo,CardExpiryDate,TransactionAmount,TransactionCountry,LargePurchase,ProductID,CIF,TransactionCurrencyCode
0,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1
1,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1
2,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1
3,0,56,1,0,1,1109976,1811,0.0062,1,0,3,11020290,1
4,0,56,1,0,1,1109976,1811,1.0354,1,0,3,11020290,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2261,0,44,1,2,1,7995391,2109,0.0000,1,0,3,11329257,1
2262,0,4,0,0,1,7995399,2109,12.4000,1,0,2,11324926,1
2263,0,4,0,0,1,7995399,2109,12.4000,1,0,2,11324926,1
2264,0,4,0,0,1,7995399,2109,12.4000,1,0,2,11324926,1


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

rf = RandomForestClassifier(n_estimators=100, random_state=42)

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [14]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.97      0.96       604
           1       0.68      0.58      0.62        76

    accuracy                           0.92       680
   macro avg       0.81      0.77      0.79       680
weighted avg       0.92      0.92      0.92       680



## Boosting

In [15]:
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=4)

xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

In [16]:
print(confusion_matrix(y_test, y_pred))

[[575  29]
 [ 40  36]]


In [17]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.95      0.94       604
           1       0.55      0.47      0.51        76

    accuracy                           0.90       680
   macro avg       0.74      0.71      0.73       680
weighted avg       0.89      0.90      0.90       680



## Stacking

In [18]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier

from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
import pandas as pd

In [19]:
# # Load and preprocess data
# df = pd.read_csv("lending_loan.csv")
# X = df[['loan_amnt','int_rate','annual_inc','dti']]
# y = df['loan_status'].apply(lambda x: 1 if x=='Charged Off' else 0)

In [20]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [21]:
# Define base learners
estimators = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100))
]

In [22]:
# Meta-learner
stack = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    cv=5
)

In [23]:
stack.fit(X_train, y_train)
y_pred = stack.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.98      0.95       604
           1       0.69      0.38      0.49        76

    accuracy                           0.91       680
   macro avg       0.81      0.68      0.72       680
weighted avg       0.90      0.91      0.90       680

