In [1]:
import numpy as np, pandas as pd, seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [2]:
X = pd.read_csv('train.csv', index_col='id')
X_test = pd.read_csv('test.csv', index_col='id')

X.dropna(axis=0, subset=['Response'], inplace=True)
y = X.Response              
X.drop(['Response'], axis=1, inplace=True)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2,
                                                                random_state=0)

low_cardinality_cols = [cname for cname in X_train.columns if X_train[cname].nunique() < 10 and 
                        X_train[cname].dtype == "object"]

numeric_cols = [cname for cname in X_train.columns if X_train[cname].dtype in ['int64', 'float64']]

my_cols = low_cardinality_cols + numeric_cols
X_train = X_train[my_cols].copy()
X_valid = X_valid[my_cols].copy()
X_test = X_test[my_cols].copy()
X_train.shape

(304887, 10)

In [3]:
from sklearn.preprocessing import LabelEncoder

cols = ['Gender', 'Vehicle_Age','Vehicle_Damage']

label = LabelEncoder()
for col in cols:
    X_train[col] = label.fit_transform(X_train[col])
    X_valid[col] = label.transform(X_valid[col])
    X_test[col] = label.transform(X_test[col])

In [4]:
X_train.drop(['Region_Code', 'Vintage'], axis = 1, inplace = True)
X_valid.drop(['Region_Code', 'Vintage'], axis = 1, inplace = True)
X_test.drop(['Region_Code', 'Vintage'], axis = 1, inplace = True)

In [5]:
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV

my_model = XGBClassifier(n_estimators=900, learning_rate = 0.05)
my_model.fit(X_train,y_train)
    
preds_valid = my_model.predict(X_valid)
valid_score = metrics.roc_auc_score(y_valid, preds_valid)
print(f"Validation AUC score: {valid_score:.8f}")

Validation AUC score: 0.50214640


In [6]:
for i in range(len(preds_valid)):
    if preds_valid[i]>0.5:
        preds_valid[i]=1
    else:
        preds_valid[i]=0
valid_score = metrics.roc_auc_score(y_valid, preds_valid)
print(f"Validation AUC score: {valid_score:.8f}")

Validation AUC score: 0.50214640


In [6]:
preds_test = my_model.predict(X_test)

output = pd.DataFrame({'id': X_test.index,
                       'Response': preds_test})
output.to_csv('sample_submission_iA3afxn.csv', index=False)