# Importing Modules and Datasets

(do scaling and feature selection using chi2 possibly)

In [1]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, OneHotEncoder, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.metrics import classification_report, f1_score
from sklearn.naive_bayes import GaussianNB, CategoricalNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, NuSVC
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel, RFE
from imblearn.over_sampling import SMOTE

In [2]:
# test data
true_data = pd.read_csv('../input/marketing-strategy-personalised-offer/test_data.csv')

# train data raw
train_data = pd.read_csv('../input/marketing-strategy-personalised-offer/train_data.csv')

y_train_all = train_data.pop('Offer Accepted')

# Data Pre-Processing

In [3]:
true_data.shape, train_data.shape, y_train_all.shape

((5305, 30), (12379, 30), (12379,))

In [4]:
# replacing missing and unnecessary values 

all_col = [i for i in train_data.columns if i not in ["car","restuarant_opposite_direction_house","travelled_more_than_5mins_for_offer"]]

transformer1 = ColumnTransformer(
    [
        ("drop_cols", "drop", ["car","restuarant_opposite_direction_house","travelled_more_than_5mins_for_offer"]),
        ("imputer", SimpleImputer(strategy="most_frequent"), all_col)
    ],
    remainder="passthrough"
)

train_data1 = pd.DataFrame(transformer1.fit_transform(train_data), columns=all_col)
true_data1 = pd.DataFrame(transformer1.transform(true_data), columns=all_col)

In [5]:
# column names

no_enc = ['travelled_more_than_15mins_for_offer','Prefer western over chinese','travelled_more_than_25mins_for_offer',
              'restuarant_same_direction_house','Cooks regularly','is foodie','has Children','Prefer home food','visit restaurant with rating (avg)']
ord_enc = ['offer expiration','income_range','no_visited_Cold drinks','no_visited_bars','no_Take-aways',
           'Restaur_spend_less_than20','Restaur_spend_greater_than20', 'age','restaurant type','Qualification',
          'Customer type','Marital Status','temperature','Travel Time']

# ord_enc variables

income_list = np.array(['Less than ₹12500', '₹12500 - ₹24999', '₹25000 - ₹37499', 
               '₹37500 - ₹49999', '₹50000 - ₹62499', '₹62500 - ₹74999', 
                '₹75000 - ₹87499', '₹87500 - ₹99999',  '₹100000 or More'])
no_list = np.array(['never', 'less1', '1~3', '4~8', 'gt8'])
offer_list = np.array(['10hours', '2days'])
age_list = np.array(['below21','21', '26', '31', '36', '41', '46','50plus'])
resto_list = np.array(['Cold drinks','Take-away restaurant', 'Restaurant with pub',
              '2 star restaurant','4 star restaurant'])
quali_list = np.array(['Some High School','High School Graduate', 'Some college - no degree','Associates degree',
              'Bachelors degree','Graduate degree (Masters or Doctorate)'])
custo_list = np.array(['Individual', 'With Colleagues', 'With Kids', 'With Family'])
marital_list = np.array([ 'Single', 'Unmarried partner','Married partner', 'Divorced', 'Widowed'])
temp_list = np.array([40, 67, 89])
travel_list = np.array([7, 10, 14, 18, 22])

In [6]:
# ordinal encoding both test and training data

income_list_oe = OrdinalEncoder(categories=[income_list],dtype=np.int64)
no_list_oe = OrdinalEncoder(categories=[no_list]*5,dtype=np.int64)
offer_list_oe = OrdinalEncoder(categories=[offer_list],dtype=np.int64)
age_list_oe = OrdinalEncoder(categories=[age_list],dtype=np.int64)
resto_list_oe = OrdinalEncoder(categories=[resto_list],dtype=np.int64)
quali_list_oe = OrdinalEncoder(categories=[quali_list],dtype=np.int64)
custo_list_oe = OrdinalEncoder(categories=[custo_list],dtype=np.int64)
marital_list_oe = OrdinalEncoder(categories=[marital_list],dtype=np.int64)
temp_list_oe = OrdinalEncoder(categories=[temp_list],dtype=np.int64)
travel_list_oe = OrdinalEncoder(categories=[travel_list],dtype=np.int64)
one_hot = OneHotEncoder(sparse=False,drop='first', dtype=np.int64)

transformer2 = ColumnTransformer(
    [
        ('offer_list_oe',offer_list_oe,['offer expiration']),
        ('income_list_oe',income_list_oe,['income_range']),
        ('no_list_oe',no_list_oe,['no_visited_Cold drinks','no_visited_bars','no_Take-aways','Restaur_spend_less_than20','Restaur_spend_greater_than20']),
        ('age_list_oe',age_list_oe,['age']),
        ('resto_list_oe',resto_list_oe,['restaurant type']),
        ('quali_list_oe', quali_list_oe,['Qualification']),
        ('custo_list_oe',custo_list_oe,['Customer type']),
        ('marital_list_oe',marital_list_oe,['Marital Status']),
        ('temp_list_oe',temp_list_oe,['temperature']),
        ('travel_list_oe',travel_list_oe,['Travel Time'])
    ],
    remainder="drop"
)

ord_enc_data = pd.DataFrame(transformer2.fit_transform(train_data1), columns=ord_enc)
true_ord_enc_data = pd.DataFrame(transformer2.fit_transform(true_data1), columns=ord_enc)

In [7]:
# OneHotEncoding both test and train_data

transformer3 = ColumnTransformer(
    [
        ('one_hot1',one_hot,['Job/Job Industry']),
        ('one_hot2',one_hot,['Climate']),
        ('one_hot3',one_hot,['drop location']),
        ('one_hot4',one_hot,['gender'])
    ],
    remainder="drop"
)

one_hot_data = pd.DataFrame(transformer3.fit_transform(train_data1), columns=[name.split("__")[1] for name in transformer3.get_feature_names_out()])
one_hot_list = [name.split("__")[1] for name in transformer3.get_feature_names_out()]
true_one_hot_data = pd.DataFrame(transformer3.transform(true_data1), columns=one_hot_list)

In [8]:
# final full training data

X_train_full = train_data1[no_enc].astype('int64')
X_train_full[one_hot_list] = one_hot_data
X_train_full[ord_enc] = ord_enc_data

# final full test data

X_true = true_data1[no_enc].astype('int64')
X_true[one_hot_list] = true_one_hot_data
X_true[ord_enc] = true_ord_enc_data

# final full label

le = LabelEncoder()
y_train_full = le.fit_transform(y_train_all)

In [9]:
X_train_full.shape, X_true.shape, y_train_full.shape

((12379, 52), (5305, 52), (12379,))

In [10]:
sm = SMOTE(random_state=32)
X_res, y_res = sm.fit_resample(X_train_full, y_train_full)
X_res.shape, y_res.shape

((13988, 52), (13988,))

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.19, random_state=32)

# Model Building

### Dummy Model

In [12]:
# dummy_model = DummyClassifier(strategy="most_frequent")
# dummy_model.fit(X_train, y_train)
# dummy_model.fit(X_train, y_train)
# print(classification_report(y_test, dummy_model.predict(X_test)))
# print(f1_score(y_test, dummy_model.predict(X_test), average='micro'))

### Logistic Regression Model

In [13]:
# Logisitc Regression, Lasso

# model = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
#                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
#                    multi_class='auto', n_jobs=None, penalty='l2',
#                    random_state=32, solver='lbfgs', tol=0.0001, verbose=0,
#                    warm_start=False)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [14]:
# Logisitc Regression, Ridge

# model = SGDClassifier(random_state=32, loss="log", penalty="l2")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [15]:
# Logisitc Regression, Elasticnet

# model = SGDClassifier(random_state=32, loss="log", penalty="elasticnet")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [16]:
# Perceptron Regression, Lasso

# model = SGDClassifier(random_state=32, loss="perceptron", penalty="l1")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [17]:
# Polynomial features (degree=2, including interaction) + Logistic Regression + Elasticnet

# polyn = PolynomialFeatures(2)

# train_data_poly = polyn.fit_transform(X_train_full)
# feature_names = [i for i in polyn.get_feature_names_out()]
# train_df_poly = pd.DataFrame(data=train_data_poly, columns=feature_names)

# X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(train_df_poly, y_train_full, test_size=0.3, random_state=32)

# model = SGDClassifier(random_state=32, loss="log", penalty="elasticnet")
# model.fit(X_train_poly, y_train_poly)
# preds = model.predict(X_test_poly)
# print(classification_report(y_test_poly, preds))

In [18]:
# Logistic Regression hyper-parameter tuning

# param_grid = {
#     "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
#     "learning_rate": ["optimal", "invscaling", "adaptive"],
#     "eta0": [1, 10, 100],
#     "penalty": ["l1", "l2", "elasticnet"]
# }
# model = SGDClassifier(random_state=32, loss="log")
# search = GridSearchCV(model, param_grid=param_grid, scoring="f1_micro", refit=True, cv=5, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [19]:
# lr_model = SGDClassifier(random_state=32,loss="log",penalty="l1",alpha=0.001,eta0=1,learning_rate="adaptive")

# lr_model.fit(X_train, y_train)
# print(classification_report(y_test, lr_model.predict(X_test)))
# print(f1_score(y_test, lr_model.predict(X_test), average='micro'))

### NaiveBayes

In [20]:
# Gaussian Naive Bayes

# model = GaussianNB()
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [21]:
# Categorical Naive Bayes

# model = CategoricalNB()
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [22]:
# HyperParameter Tuning

# param_grid = {'alpha':[0.01,0.1,1,10,100],
#              'fit_prior':[True, False]}

# model = CategoricalNB()

# search = GridSearchCV(model, param_grid=param_grid, cv=5, scoring="f1_micro", refit=True, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [23]:
# nb_model = CategoricalNB(alpha=100)
# nb_model.fit(X_train, y_train)
# print(classification_report(y_test, nb_model.predict(X_test)))
# print(f1_score(y_test, nb_model.predict(X_test), average='micro'))

### KNeighborsClassifier Clustering Model

In [24]:
# Weights = inverse of distance

# model = KNeighborsClassifier(weights="distance")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [25]:
# Metric = Manhattan distance
# Weight = inverse of distance

# model = KNeighborsClassifier(weights="distance", p=1)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [26]:
# Metric = Manhattan distance
# Weight = inverse of distance
# Algorithm = BallTree

# model = KNeighborsClassifier(weights="distance", p=1, algorithm="ball_tree")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [27]:
## Seems like changing algorithm only just affects time taken
### HPT for the `leaf_size` parameter

# param_grid = {
#     'n_neighbors': [5,7,9],
#     "leaf_size": [1, 2, 3, 5, 10],
#     'weights': ['uniform','distance'],
#     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
# }
# model = KNeighborsClassifier()
# search = GridSearchCV(model, param_grid=param_grid, cv=3, scoring="f1_micro", refit=True, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [28]:
# Metric = Manhattan distance
# Weight = inverse of distance

# kn_model = KNeighborsClassifier(algorithm='kd_tree', leaf_size=2, 
#                      metric_params=None, n_jobs=-1, n_neighbors=7,
#                      weights='uniform')

# kn_model.fit(X_train, y_train)
# print(classification_report(y_test, kn_model.predict(X_test)))
# print(f1_score(y_test, kn_model.predict(X_test), average='micro'))

### SVM Model

In [29]:
# RBF kernel

# model = SVC(random_state=32)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [30]:
# sigmoid kernel

# model = SVC(random_state=32, kernel='sigmoid')
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [31]:
# Linear kernel

# model = SVC(random_state=32, kernel="linear")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [32]:
# Poly kernel, degree = 2

# model = SVC(random_state=32, degree=2, kernel="poly")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [33]:
# Poly kernel, degree = 3

# model = SVC(random_state=32, degree=3, kernel="poly")
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [34]:
# NuSVC 

# model = NuSVC(random_state=32)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [35]:
# HPT for rbf kernel (which performed best)

# param_grid = {
#     'nu': [0.3, 0.5, 0.8],
#     'kernel': ['rbf','linear','poly']
# }
# model = NuSVC(random_state=32)
# search = GridSearchCV(model, param_grid=param_grid, cv=3, scoring="f1_micro", refit=True, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [36]:
# model = NuSVC(random_state=32, kernel='rbf', nu=0.8)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [37]:
# HPT for rbf kernel (which performed best)

# param_grid = {
#     "C": [0.01, 0.1, 1, 10, 100],
#     "gamma": ["scale", 0.01, 0.1],
# }
# model = SVC(random_state=32)
# search = GridSearchCV(model, param_grid=param_grid, cv=3, scoring="f1_micro", refit=True, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [38]:
#tuned model

# sv_model = SVC(random_state=32, C=10, gamma=0.01)

# sv_model.fit(X_train, y_train)
# print(classification_report(y_test, sv_model.predict(X_test)))
# print(f1_score(y_test, sv_model.predict(X_test), average='micro'))

### CART Models

In [39]:
# With PCA (seems to reduce the score)

# pca = PCA(random_state=32)
# reduced_transform_train_df = pca.fit_transform(X_train_full[ord_enc+one_hot_list])

# print(pca.explained_variance_ratio_[:5])

# reduced_transform_train_df = pd.DataFrame(data = reduced_transform_train_df[:,:4], columns = ["PC1", "PC2", "PC3", "PC4"])
# for col in no_enc:
#     reduced_transform_train_df[col] = X_train_full[col]
# reduced_X_train_com = reduced_transform_train_df.copy()
# reduced_y_train_com = y_train_full
# reduced_X_train, reduced_X_valid, reduced_y_train, reduced_y_valid = train_test_split(reduced_X_train_com, reduced_y_train_com, test_size=0.3, random_state=32)

# model = DecisionTreeClassifier(class_weight="balanced", random_state=32, max_depth=5, ccp_alpha=0.001)
# cross_val_score(model, reduced_X_train_com, reduced_y_train_com, cv=5, scoring="f1_micro").mean()

In [40]:
# Without PCA

# model = DecisionTreeClassifier(class_weight="balanced", random_state=32, ccp_alpha=0.001, max_depth=8)
# cross_val_score(model, X_train_full, y_train_full, cv=5, scoring="f1_micro").mean()

In [41]:
# Grid search without PCA

# param_grid = {
#     "ccp_alpha": [0.0001, 0.001, 0.01],
#     "max_depth": [5, 10, 15],
#     "min_samples_split": [15, 20, 25],
#     "min_samples_leaf": [3,5, 10],
#     "max_features": ["auto", "log2", None]
# }
# model = DecisionTreeClassifier(class_weight="balanced", random_state=32)
# search = GridSearchCV(model, param_grid=param_grid, scoring="f1_micro", refit=True, cv=5, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [42]:
# tuned model

# tree_model = DecisionTreeClassifier(class_weight="balanced",random_state=32,ccp_alpha=0.0001,max_depth=5,
#                                     max_features=None,min_samples_leaf=15,min_samples_split=5)

# tree_model.fit(X_train, y_train)

# print(classification_report(y_test, tree_model.predict(X_test)))
# print(f1_score(y_test, tree_model.predict(X_test), average='micro'))

### Bagging

In [43]:
# model = ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
#                      criterion='gini', max_depth=None, max_features='auto',
#                      max_leaf_nodes=None, max_samples=None,
#                      min_impurity_decrease=0.0,
#                      min_samples_leaf=1, min_samples_split=2,
#                      min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
#                      oob_score=False, random_state=32, verbose=0,
#                      warm_start=False)

# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [44]:
# HPT

# param_grid = {
#     "min_samples_split": [2, 5, 10],
#     "max_samples": [0.5, 0.9],
#     "n_estimators": [100, 200],
#     "min_samples_leaf": [10, 20],
#     "max_depth": [10, 20],
#     "ccp_alpha": [0.0001, 0.001]
# }
# model = RandomForestClassifier(max_features=None,random_state=32,class_weight="balanced",
#                                min_samples_leaf=15,n_estimators=100)
# search = GridSearchCV(model, param_grid, scoring="f1_micro", refit=True, cv=3, verbose=3)
# search.fit(X_train_full, y_train_full)
# search.best_params_

In [45]:
# tuned model

# rf_model = RandomForestClassifier(ccp_alpha=0.0001,
#                                   max_depth=20,
#                                   max_samples=0.5,
#                                   min_samples_leaf=10, min_samples_split=2,
#                                   n_estimators=200,random_state=32)

# rf_model.fit(X_train, y_train)

# print(classification_report(y_test, rf_model.predict(X_test)))
# print(f1_score(y_test, rf_model.predict(X_test), average='micro'))

### Boosting

In [46]:
# Tuning of parameters related to DecisionTree

# param_grid = {
#     "min_child_weight": [0.3, 0.5, 1],
#     "max_depth": [2, 3, 4],
#     "subsample": [0.5, 0.75, 1],
#     "lambda": [1, 3, 5],
#      "alpha": [3,5],
#      "eta": [0.01, 0.05, 0.1, 1],
# }
# model = XGBClassifier(random_state=32)

# search = GridSearchCV(model,param_grid,scoring="f1_micro",refit=True,cv=3, verbose=3)

# search.fit(X_train_full, y_train_full)
# print(search.best_params_)
# print(search.best_score_)

In [47]:
# Tuning of parameters related to boosting

# param_grid = {
#      "lambda": [1, 3, 5],
#      "alpha": [3,5],
#      "eta": [0.01, 0.05, 0.1, 1],
# }
# model = XGBClassifier(random_state=32,max_depth=3,min_child_weight=0.5,subsample=1)

# search = GridSearchCV(model,param_grid,scoring="f1_micro",refit=True,cv=3, verbose=3)

# search.fit(X_train_full, y_train_full)

# print(search.best_params_)
# print(search.best_score_)

In [48]:
# model = LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
#                            solver='svd', store_covariance=False, tol=0.0001)

# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))
# print(f1_score(y_test, model.predict(X_test), average='micro'))

In [49]:
# model = AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
#                    n_estimators=50, random_state=32)
# model.fit(X_train, y_train)
# print(classification_report(y_test, model.predict(X_test)))

In [50]:
# gb_model = GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
#                            learning_rate=0.1, loss='deviance', max_depth=3,
#                            max_features=None, max_leaf_nodes=None,
#                            min_impurity_decrease=0.0,
#                            min_samples_leaf=1, min_samples_split=2,
#                            min_weight_fraction_leaf=0.0, n_estimators=100,
#                            n_iter_no_change=None,
#                            random_state=32, subsample=1.0, tol=0.0001,
#                            validation_fraction=0.1, verbose=0,
#                            warm_start=False)

# gb_model.fit(X_train, y_train)
# print(classification_report(y_test, gb_model.predict(X_test)))
# print(f1_score(y_test, gb_model.predict(X_test), average='micro'))

In [51]:
kwargs = {"lambda": 1}
xgb_model = XGBClassifier(random_state=32,max_depth=5,min_child_weight=0.5,subsample=0.75,alpha=3,eta=0.1,**kwargs)

xgb_model.fit(X_train, y_train)

print(classification_report(y_test, xgb_model.predict(X_test)))
print(f1_score(y_test, xgb_model.predict(X_test), average='micro'))

              precision    recall  f1-score   support

           0       0.64      0.60      0.62      1338
           1       0.62      0.66      0.64      1320

    accuracy                           0.63      2658
   macro avg       0.63      0.63      0.63      2658
weighted avg       0.63      0.63      0.63      2658

0.6316779533483823


In [52]:
selector = RFE(xgb_model, step=1, verbose=3, importance_getter='auto')
selector.fit(X_train_full,y_train_full)
X_train_rfe = selector.transform(X_res)
X_true_rfe = selector.transform(X_true)
X_train, X_test, y_train, y_test = train_test_split(X_train_rfe, y_res, test_size=0.2, random_state=32)

Fitting estimator with 52 features.
Fitting estimator with 51 features.
Fitting estimator with 50 features.
Fitting estimator with 49 features.
Fitting estimator with 48 features.
Fitting estimator with 47 features.
Fitting estimator with 46 features.
Fitting estimator with 45 features.
Fitting estimator with 44 features.
Fitting estimator with 43 features.
Fitting estimator with 42 features.
Fitting estimator with 41 features.
Fitting estimator with 40 features.
Fitting estimator with 39 features.
Fitting estimator with 38 features.
Fitting estimator with 37 features.
Fitting estimator with 36 features.
Fitting estimator with 35 features.
Fitting estimator with 34 features.
Fitting estimator with 33 features.
Fitting estimator with 32 features.
Fitting estimator with 31 features.
Fitting estimator with 30 features.
Fitting estimator with 29 features.
Fitting estimator with 28 features.
Fitting estimator with 27 features.


In [53]:
kwargs = {"lambda": 1}
xgb_model = XGBClassifier(random_state=32,max_depth=5,min_child_weight=0.5,subsample=0.75,alpha=3,eta=0.1,**kwargs)

xgb_model.fit(X_train, y_train)

print(classification_report(y_test, xgb_model.predict(X_test)))
print(f1_score(y_test, xgb_model.predict(X_test), average='micro'))

              precision    recall  f1-score   support

           0       0.64      0.62      0.63      1406
           1       0.63      0.64      0.64      1392

    accuracy                           0.63      2798
   macro avg       0.63      0.63      0.63      2798
weighted avg       0.63      0.63      0.63      2798

0.6333095067905646


### MultiLayerPerceptron

In [54]:
# HPT

# param_grid = {
#     "hidden_layer_sizes": [(10), (30), (30,30)],
#     "max_iter": [ 450, 500, 1000],
#     "solver": ["sgd", "adam"],
#     "activation": ["relu", "softmax"],
#     "alpha": [0.01, 0.1],
# }
# model = MLPClassifier(random_state=32, learning_rate="adaptive")
# search = GridSearchCV(model, param_grid, scoring="f1_micro", refit=True, cv=3, verbose=3)
# search.fit(X_train_full, y_train_full)
# print(search.best_params_)
# print(search.best_score_)

In [55]:
# mlp_model = MLPClassifier(random_state=32,learning_rate="adaptive",hidden_layer_sizes=(30),max_iter=450,solver="adam",
#                              activation="relu",alpha=0.1)

# mlp_model.fit(X_train, y_train)
# print(classification_report(y_test, mlp_model.predict(X_test)))

### Comparing various models _(based on F1 macro score)_
- XGBoost: 0.61
- RandomForestClassifier: 0.61
- MLPClassifier: 0.59
- NaiveBayes: 0.59
- SVM: 0.58
- DecisionTree: 0.57
- KNeighbors Clustering: 0.57
- Logistic Regression: 0.53
- Dummy Classifier: 0.36

# Submission


In [56]:
# fit model on whole data first !!!!!!!!!!!!!!!!!!!!!!!

xgb_model.fit(X_train_full, y_train_full)


# prediction; do check that the model is correct !!!!!!!!!!!!!!!!!!!!!!!

preds = [int(i) for i in xgb_model.predict(X_true)]
submission = pd.DataFrame({"id": np.arange(0,len(preds)), "Offer Accepted": le.inverse_transform(preds)})
submission.to_csv("submission.csv", index=False)

In [57]:
len(preds)

5305

In [58]:
submission.head()

Unnamed: 0,id,Offer Accepted
0,0,No
1,1,Yes
2,2,No
3,3,No
4,4,Yes
