In [None]:
import pandas as pd
import numpy as np


In [None]:
test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')

In [None]:
train.fillna(-999, inplace=True)
test.fillna(-999, inplace=True)



In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
for col in train.columns:
    if train[col].dtype == 'object':
        le = LabelEncoder()
        le.fit(list(train[col].values) + list(test[col].values))
        train[col] = le.transform(train[col].values)
        test[col] = le.transform(test[col].values)

In [None]:
train

In [None]:
test

In [None]:
train['month_sin'] = train['month'].apply(lambda x: np.sin(2 * np.pi * x / 12))
test['month_sin'] = test['month'].apply(lambda x: np.sin(2 * np.pi * x / 12))

train['month_cos'] = train['month'].apply(lambda x: np.cos(2 * np.pi * x / 12))
test['month_cos'] = test['month'].apply(lambda x: np.cos(2 * np.pi * x / 12))

In [None]:
train['balance'] = train['balance'].apply(lambda x: np.log1p(x))
test['balance'] = test['balance'].apply(lambda x: np.log1p(x))

In [None]:
train['balance_age']= train['balance'] * (train['age'])
test['balance_age'] = test['balance'] * (test['age'])

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [None]:
X = train.drop(columns = ['id', 'y'], axis=1)
y = train['y']

X_test = test.drop(['id'], axis=1)

# Replace inf and NaN values with a large finite number or a placeholder
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(-999, inplace=True)
X_test.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test.fillna(-999, inplace=True)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(
    n_estimators=1000,
    learning_rate=0.06,
    max_depth=6,
    use_label_encoder=False,
    eval_metric='auc',
    random_state=42
)

model.fit(X_train, y_train)

y_pred = model.predict(X_valid)
accuracy = accuracy_score(y_valid, y_pred)

print(f'Validation Accuracy: {accuracy:.4f}')

In [None]:
model = xgb.XGBClassifier(
    use_label_encoder=False,
    eval_metric='auc',
    random_state=42
)

param_grid = {
    'n_estimators': [100, 500, 1000],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5, 6],
}

from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)
best_model = grid_search.best_estimator_

y_test_pred = best_model.predict(X_valid)
print(f'Test Accuracy: {accuracy_score(y_valid, y_test_pred):.4f}')

# submission = pd.DataFrame({
#     'id': test['id'],
#     'y': y_test_pred
# })

In [None]:
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred_nn = model.predict(X_valid)

print(f'Neural Network Validation Accuracy: {accuracy_score(y_valid, y_pred_nn):.4f}')

In [None]:
param_grid_nn = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01],
}

mlp = MLPClassifier(random_state=42)
grid_search_nn = GridSearchCV(
    estimator=mlp,
    param_grid=param_grid_nn,
    scoring='accuracy',
    cv=3,
    verbose=1,
    n_jobs=-1
)
grid_search_nn.fit(X_train, y_train)

print("Best parameters for Neural Network found: ", grid_search_nn.best_params_)
print("Best cross-validation score for Neural Network: ", grid_search_nn.best_score_)

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_valid)
print(f'Random Forest Validation Accuracy: {accuracy_score(y_valid, y_pred_rf):.4f}')