In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
df = pd.read_csv("v1_train.csv")

In [2]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 21 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   Age                       100000 non-null  float64
 1   Monthly_Inhand_Salary     100000 non-null  float64
 2   Num_Bank_Accounts         100000 non-null  int64  
 3   Num_Credit_Card           100000 non-null  int64  
 4   Interest_Rate             100000 non-null  int64  
 5   Num_of_Loan               100000 non-null  int64  
 6   Delay_from_due_date       100000 non-null  int64  
 7   Num_of_Delayed_Payment    100000 non-null  float64
 8   Changed_Credit_Limit      100000 non-null  float64
 9   Num_Credit_Inquiries      100000 non-null  float64
 10  Credit_Mix                100000 non-null  int64  
 11  Outstanding_Debt          100000 non-null  float64
 12  Credit_Utilization_Ratio  100000 non-null  float64
 13  Credit_History_Age        100000 non-null  in

In [3]:
df["Credit_Score"].unique()

array([3, 2, 1], dtype=int64)

In [4]:
# Split the dataset into training and test sets
X = df.drop("Credit_Score", axis=1)
y = df["Credit_Score"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models
models = [
    ("Logistic Regression", LogisticRegression()),
    ("Naive Bayes", GaussianNB()),
    ("K-Nearest Neighbors", KNeighborsClassifier()),
    ("Neural Network", MLPClassifier()),
    ("Gradient Boosting Machines", GradientBoostingClassifier()),   
    ("LightGBM", LGBMClassifier()),
    ("Decision Trees", DecisionTreeClassifier()),
    ("CatBoost", CatBoostClassifier(logging_level='Silent')),
    ("Random Forest", RandomForestClassifier()),
]

# Train and evaluate each model
for name, model in models:
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")


Training Logistic Regression...
Logistic Regression Accuracy: 0.5733
Training Naive Bayes...
Naive Bayes Accuracy: 0.6309
Training K-Nearest Neighbors...
K-Nearest Neighbors Accuracy: 0.6514
Training Neural Network...
Neural Network Accuracy: 0.6197
Training Gradient Boosting Machines...
Gradient Boosting Machines Accuracy: 0.6974
Training LightGBM...
LightGBM Accuracy: 0.7256
Training Decision Trees...
Decision Trees Accuracy: 0.6917
Training CatBoost...
CatBoost Accuracy: 0.7473
Training Random Forest...
Random Forest Accuracy: 0.7889


In [5]:
# Load the dataset
df = pd.read_csv("v2_train.csv")

In [6]:
# Split the dataset into training and test sets
X = df.drop("Credit_Score", axis=1)
y = df["Credit_Score"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models
models = [
    ("Logistic Regression", LogisticRegression()),
    ("Naive Bayes", GaussianNB()),
    ("K-Nearest Neighbors", KNeighborsClassifier()),
    ("Neural Network", MLPClassifier()),
    ("Gradient Boosting Machines", GradientBoostingClassifier()),   
    ("LightGBM", LGBMClassifier()),
    ("Decision Trees", DecisionTreeClassifier()),
    ("CatBoost", CatBoostClassifier(logging_level='Silent')),
    ("Random Forest", RandomForestClassifier()),
]

# Train and evaluate each model
for name, model in models:
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")

Training Logistic Regression...
Logistic Regression Accuracy: 0.5838
Training Naive Bayes...
Naive Bayes Accuracy: 0.6302
Training K-Nearest Neighbors...
K-Nearest Neighbors Accuracy: 0.6472
Training Neural Network...
Neural Network Accuracy: 0.5820
Training Gradient Boosting Machines...
Gradient Boosting Machines Accuracy: 0.6968
Training LightGBM...
LightGBM Accuracy: 0.7249
Training Decision Trees...
Decision Trees Accuracy: 0.6955
Training CatBoost...
CatBoost Accuracy: 0.7467
Training Random Forest...
Random Forest Accuracy: 0.7941


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv("v2_train.csv")

# Split the dataset into training and test sets
X = df.drop("Credit_Score", axis=1)
y = df["Credit_Score"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models
models = [
    ("Logistic Regression", LogisticRegression()),
    ("Naive Bayes", GaussianNB()),
    ("K-Nearest Neighbors", KNeighborsClassifier()),
    ("SVM", SVC()),
    ("Linear SVM", LinearSVC()),
    ("Decision Trees", DecisionTreeClassifier()),
    ("Random Forest", RandomForestClassifier()),
    ("Gradient Boosting Machines", GradientBoostingClassifier()),   
    ("AdaBoost", AdaBoostClassifier()),
    ("Neural Network", MLPClassifier()),
    ("XGBoost", XGBClassifier()),
    ("LightGBM", LGBMClassifier()),
    ("CatBoost", CatBoostClassifier(logging_level='Silent')),
]

# Train and evaluate each model
for name, model in models:
    print(f"Training {name}...")
    # Create a pipeline for each model to scale the features and fit the model
    pipeline = Pipeline(steps=[("scaler", StandardScaler()), ("model", model)])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")


Training Logistic Regression...
Logistic Regression Accuracy: 0.6333
Training Naive Bayes...
Naive Bayes Accuracy: 0.6304
Training K-Nearest Neighbors...
K-Nearest Neighbors Accuracy: 0.7356
Training SVM...
