In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

data = pd.read_csv("/content/credit_card_default (1).csv")


# Encoding categorical variables (if needed)
label_encoder = LabelEncoder()
data['SEX'] = label_encoder.fit_transform(data['SEX'])
data['EDUCATION'] = label_encoder.fit_transform(data['EDUCATION'])
data['MARRIAGE'] = label_encoder.fit_transform(data['MARRIAGE'])

# Scaling numerical features
scaler = StandardScaler()
data[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6',
      'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',
      'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']] = scaler.fit_transform(data[['LIMIT_BAL', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6',
                                                                                                               'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',
                                                                                                               'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']])

# Splitting the dataset into training and testing sets
X = data.drop(columns=['ID', 'default payment next month'])
y = data['default payment next month']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the preprocessed dataset
print("Preprocessed dataset:")
print(X_train.head())


Preprocessed dataset:
       LIMIT_BAL  SEX  EDUCATION  MARRIAGE       AGE     PAY_1     PAY_2  \
21753  -0.674276    1          2         1 -1.246020  0.014861  0.111736   
251    -1.059646    0          2         1 -0.812074  0.014861  0.111736   
22941   0.096463    1          5         0  0.923707  0.014861  0.111736   
618    -0.828424    0          1         1 -1.137534  0.014861  0.111736   
17090  -0.288907    1          2         1 -1.137534  0.014861  0.111736   

          PAY_3     PAY_4     PAY_5  ...  BILL_AMT3  BILL_AMT4  BILL_AMT5  \
21753  0.138865  0.188746  0.234917  ...   0.451458   0.473608  -0.010994   
251    0.138865  0.188746  0.234917  ...  -0.257514  -0.279923  -0.301177   
22941 -0.696663 -0.666599 -0.647565  ...  -0.665672  -0.672497  -0.549877   
618    0.138865  0.188746  0.234917  ...  -0.122284  -0.056332  -0.011389   
17090  0.138865  0.188746  0.234917  ...   0.976533   1.158992   1.324798   

       BILL_AMT6  PAY_AMT1  PAY_AMT2  PAY_AMT3  PAY_AMT4  

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier

# Algorithm Implementation
# k-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_score = knn.score(X_test, y_test)
print("kNN Accuracy:", knn_score)

# Support Vector Machine
svm = SVC()
svm.fit(X_train, y_train)
svm_score = svm.score(X_test, y_test)
print("SVM Accuracy:", svm_score)

# Gradient Boosting
gradient_boosting = GradientBoostingClassifier()
gradient_boosting.fit(X_train, y_train)
gradient_boosting_score = gradient_boosting.score(X_test, y_test)
print("Gradient Boosting Accuracy:", gradient_boosting_score)

# AdaBoost
ada_boost = AdaBoostClassifier()
ada_boost.fit(X_train, y_train)
ada_boost_score = ada_boost.score(X_test, y_test)
print("AdaBoost Accuracy:", ada_boost_score)

# XGBoost
xgboost = XGBClassifier()
xgboost.fit(X_train, y_train)
xgboost_score = xgboost.score(X_test, y_test)
print("XGBoost Accuracy:", xgboost_score)


kNN Accuracy: 0.7988333333333333
SVM Accuracy: 0.8195
Gradient Boosting Accuracy: 0.821
AdaBoost Accuracy: 0.8165
XGBoost Accuracy: 0.8166666666666667


In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# Define the parameter distribution
param_dist = {
    'n_estimators': randint(50, 150),
    'learning_rate': [0.05, 0.1, 0.5],
    'max_depth': randint(3, 8)
}


# Create RandomizedSearchCV object with fewer iterations
random_search = RandomizedSearchCV(GradientBoostingClassifier(), param_distributions=param_dist, n_iter=5, cv=5)
random_search.fit(X_train, y_train)


# Get best parameters and score
best_params = random_search.best_params_
best_score = random_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 60}
Best Score: 0.822125
