In [16]:
import pandas as pd
import numpy as np
import seaborn as sns

In [32]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 50)

In [18]:
df_train = pd.read_csv('./data/training_set.csv')

In [27]:
df_test = pd.read_csv('./data/test_set.csv')

### Comparing train and test headers

In [28]:
df_train.columns.difference(df_test.columns)


Index(['confidence', 'cookie_id', 'date', 'label', 'user_id', 'visit_number'], dtype='object')

### Checking for features correlation with label in train df

In [47]:
label_corr = df_train.corr()['label'].sort_values(ascending=False)

In [69]:
label_corr

label                                                             1.000000
page_views_account_dashboard                                      0.190346
Visits_c_list_order                                               0.131289
page_views_search_filter_fuel                                     0.127931
entry_page_messaging_conversation                                 0.126534
visit_time                                                        0.123500
page_views_editorial_news                                         0.092834
Visits_c_detail_save_favorite                                     0.089356
page_views_editorial_videos                                       0.084963
entry_page_detail                                                 0.083484
Visits_c_list_save_favorite                                       0.082068
page_views_detail                                                 0.079863
page_views_my_saved_ads                                           0.075950
page_views_detail_fullscr

### A NanN in a correlation means that the relevant columns always has the same value. Because of this it will not be able to help us in our prediction and therefore we will drop all the columns with the features with the NaN correlation

In [71]:
df_train.drop(label_corr[label_corr.isna()].index.tolist(), axis=1, inplace=True)

In [72]:
df_train

Unnamed: 0,date,cookie_id,visit_number,user_id,device_Mobile_Phone,channel_Display,channel_Email,channel_Push_Notifications,entry_page_account_dashboard,entry_page_alert_results,entry_page_complete_register,entry_page_detail,entry_page_detail_error,entry_page_detail_fullscreen,entry_page_editorial_generic_detail,entry_page_editorial_videos,entry_page_form_from_cross_contact_call,entry_page_form_from_cross_contact_viewed,entry_page_list,entry_page_menu_open,entry_page_messaging_conversation,entry_page_my_ads,entry_page_my_alerts,entry_page_my_messaging_inbox,entry_page_my_saved_ads,entry_page_need_user_account,entry_page_onboarding_buyer_alerts,entry_page_onboarding_buyer_chat,entry_page_order,entry_page_professional_stock,entry_page_recommend_your_friends,entry_page_saved_searches,entry_page_search_filter,entry_page_search_filter_brand,entry_page_search_filter_transmission,entry_page_search_filter_version,visit_time,ad_insertion,login,register,alerts,e1_ConfirmationEmail_1,e1_ConfirmationEmail_2,e2_CallPhone_1,e2_CallPhone_2,e11_FirstSentMessage_1,e11_FirstSentMessage_2,Visits_c_cross_contact_confirmation_email,Visits_c_detail_edit_my_ad,Visits_c_detail_freespee_error,Visits_c_detail_icon_discount_applied,Visits_c_detail_icon_funded,Visits_c_detail_icon_taxes_included,Visits_c_detail_icon_warranty,Visits_c_detail_remove_favorite,Visits_c_detail_save_favorite,Visits_c_detail_see_vehicle_history,Visits_c_detail_share_ad,Visits_c_detail_view_conversation,Visits_c_link_desktop_professional,Visits_c_list_create_alert_without_filters,Visits_c_list_order,Visits_c_list_remove_favorite,Visits_c_list_save_favorite,Visits_c_menu_my_favorites,Visits_c_menu_new_search,Visits_c_messaging_conversation_view_ad,Visits_c_messaging_conversation_view_publisher,Visits_c_my_ads_renew,Visits_c_my_alerts_disable_alert_emails,Visits_c_my_alerts_disable_alert_push_notif,Visits_c_my_alerts_enable_alert_emails,Visits_c_my_alerts_enable_alert_push_notif,Visits_c_my_alerts_go_to_filters,Visits_c_my_alerts_go_to_filters_from_empty_page,Visits_c_my_alerts_remove_alert,Visits_c_my_messaging_inbox_delete_conversation,Visits_c_my_saved_ads_remove_favorite,Visits_c_phone_entry_first_not_now_phone_entry_phone,Visits_c_saved_searches_create_alert,Visits_c_search_filter_recent_search,Visits_c_tab_bar_my_ads,Visits_c_tab_bar_my_messages,Visits_c_tab_bar_new_search,Visits_c_tab_bar_saved_searches,Visits_c_tab_bar_sell_car,page_views_account_dashboard,page_views_ad_deletion_confirmation,page_views_ad_deletion_error,page_views_ad_insertion_error,page_views_ad_modification_error,page_views_ad_modification_km,page_views_alert_results,page_views_buying_safely,page_views_c_my_saved_ads_call_phone,page_views_c_my_saved_ads_contact_messaging,page_views_carousel,page_views_detail,page_views_detail_car_sold,page_views_detail_error,page_views_detail_fullscreen,page_views_edit_my_account_confirmation,page_views_edit_my_account_error,page_views_editorial_generic_detail,page_views_editorial_news,page_views_editorial_search,page_views_editorial_tests,page_views_editorial_videos,page_views_editorial_videos_detail,page_views_feedback,page_views_form_detail,page_views_form_error,page_views_form_from_cross_contact_call,page_views_form_from_cross_contact_recommended,page_views_form_from_cross_contact_viewed,page_views_list,page_views_login_error,page_views_menu_open,page_views_messaging_conversation,page_views_my_ads,page_views_my_alerts,page_views_my_messaging_inbox,page_views_my_saved_ads,page_views_need_user_account,page_views_notifications_configuration,page_views_onboarding_seller_chat,page_views_pay_ad,page_views_privacy_configuration,page_views_professional_stock,page_views_recommend_your_friends,page_views_register_error,page_views_report_an_announcer_confirmation,page_views_saved_searches,page_views_search_filter,page_views_search_filter_car_body,page_views_search_filter_car_doors,page_views_search_filter_car_places,page_views_search_filter_color,page_views_search_filter_fuel,page_views_search_filter_region_level2,page_views_search_filter_transmission,page_views_search_filter_user_role_id,page_views_search_filter_version,page_views_send_buyer_phone_to_seller,label,confidence
0,2019-02-03,14890,42,8498,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,5,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0.958484
1,2019-02-03,187498,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,2,0.996770
2,2019-01-31,54206,143,1617,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,1,1,0,15,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0.946318
3,2019-01-16,54381,70,1715,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,515,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0.963148
4,2019-01-19,14891,15,8499,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0.997309
5,2019-02-06,180467,12,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0.951895
6,2019-01-21,162404,20,76377,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0.924831
7,2019-01-15,44649,248,25375,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0.926503
8,2019-02-05,273155,6,120271,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,479,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0.908805
9,2019-01-31,112003,6,56090,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0.928742
