#Preparación de datos

In [16]:
# Cargar los paquetes y librerías necesarios 

import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix, make_scorer

import warnings
warnings.filterwarnings("ignore")

In [17]:
# Cargar los datos desde el archivo CSV en el dataframe llamado df.
df = pd.read_csv("/Users/Lalis/Desktop/Proyecto analitica 2/base_proyecto.csv", sep=",", header = 0,decimal=".")  
df.head() # show the "head" -- first 5 rows of the data; note, these are rows 0...4

Unnamed: 0,DemoReqPg_CallClicks_evt_count,air_purifier_page_top,bounces,checkout_page_top,client_id,contactus_top,converted_in_7days,country,customer_service_amc_login_top,customer_service_request_login_top,...,visited_customer_service_amc_login,visited_customer_service_request_login,visited_demo_page,visited_offer_page,visited_security_solutions_page,visited_storelocator,visited_successbookdemo,visited_vacuum_cleaner_page,visited_water_purifier_page,water_purifier_page_top
0,0,0,0,0,1000148488.154624,0,0,i,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,1000446930.154546,0,0,d,0,0,...,0,0,1,0,0,0,0,0,0,0
2,0,0,0,0,1000984214.15461,0,0,i,0,0,...,0,0,0,0,0,0,0,0,1,142
3,0,0,1,0,1001576926.154625,0,0,d,0,0,...,0,0,0,1,0,0,0,0,1,107
4,0,0,1,0,1002130272.15414,0,0,d,0,0,...,0,0,0,0,0,0,0,0,0,0


Como primer paso de limpieza se eliminaron los datos que presentaban incongruencias, como el caso donde la variable objetivo tenia valores de 2 o 3, al ser tan pocos se entiende que esto pudo ser por error humano a la hora de diligenciar, adicionalmente se observo la falta de diferentes datos en los mismos casos donde la variable objetivo tenia valores incongruentes.  

In [18]:
df.info() # Para chequear qué tipo de datos tenemos 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 709244 entries, 0 to 709243
Data columns (total 45 columns):
 #   Column                                  Non-Null Count   Dtype 
---  ------                                  --------------   ----- 
 0   DemoReqPg_CallClicks_evt_count          709244 non-null  int64 
 1   air_purifier_page_top                   709244 non-null  int64 
 2   bounces                                 709244 non-null  int64 
 3   checkout_page_top                       709244 non-null  int64 
 4   client_id                               709244 non-null  object
 5   contactus_top                           709244 non-null  int64 
 6   converted_in_7days                      709244 non-null  int64 
 7   country                                 709244 non-null  object
 8   customer_service_amc_login_top          709244 non-null  int64 
 9   customer_service_request_login_top      709244 non-null  int64 
 10  date                                    709244 non-null 

In [19]:
# Limpieza de los datos -- conversión de tipos de datos que deberían ser categóricos

df['country'] = df['country'].astype('category')
df['date'] = df['date'].astype('category')
df['device'] = df['device'].astype('category')
df['region'] = df['region'].astype('category')
df['sourceMedium'] = df['sourceMedium'].astype('category')

df = df.drop(columns = 'client_id')

In [20]:
# fijación de variables categóricas
imputer = SimpleImputer(missing_values = np.nan, strategy='constant')
imputer.fit(df.select_dtypes(exclude=['int64','float64']))
df[df.select_dtypes(exclude=['int64','float64']).columns] = imputer.transform(df.select_dtypes(exclude=['int64','float64']))
           
# fijación de variables numéricas 
imputer = SimpleImputer(missing_values = np.nan, strategy='median')
imputer.fit(df.select_dtypes(include=['int64','float64']))
df[df.select_dtypes(include=['int64','float64']).columns] = imputer.transform(df.select_dtypes(include=['int64','float64']))

In [21]:
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 709244 entries, 0 to 709243
Data columns (total 44 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   DemoReqPg_CallClicks_evt_count          709244 non-null  float64
 1   air_purifier_page_top                   709244 non-null  float64
 2   bounces                                 709244 non-null  float64
 3   checkout_page_top                       709244 non-null  float64
 4   contactus_top                           709244 non-null  float64
 5   converted_in_7days                      709244 non-null  float64
 6   country                                 709244 non-null  object 
 7   customer_service_amc_login_top          709244 non-null  float64
 8   customer_service_request_login_top      709244 non-null  float64
 9   date                                    709244 non-null  object 
 10  demo_page_top                           7092

In [22]:
# Limpieza de datos -- creación de dummies para variables no numéricas ("one hot encoding")

df = pd.get_dummies(df, columns = df.select_dtypes(exclude=['int64','float64']).columns, drop_first = True)

pd.options.display.max_columns = None # remove the limit on the number of columns by default only 20 are shows

df.head()  # nuestro dataset tiene ahora 241 columnas (!)

Unnamed: 0,DemoReqPg_CallClicks_evt_count,air_purifier_page_top,bounces,checkout_page_top,contactus_top,converted_in_7days,customer_service_amc_login_top,customer_service_request_login_top,demo_page_top,dsls,fired_DemoReqPg_CallClicks_evt,fired_help_me_buy_evt,fired_phone_clicks_evt,goal4Completions,help_me_buy_evt_count,newUser,offer_page_top,pageviews,paid,phone_clicks_evt_count,security_solutions_page_top,sessionDuration,sessions,storelocator_top,successbookdemo_top,vacuum_cleaner_page_top,visited_air_purifier_page,visited_checkout_page,visited_contactus,visited_customer_service_amc_login,visited_customer_service_request_login,visited_demo_page,visited_offer_page,visited_security_solutions_page,visited_storelocator,visited_successbookdemo,visited_vacuum_cleaner_page,visited_water_purifier_page,water_purifier_page_top,country_i,date_2019-01-02,date_2019-01-03,date_2019-01-04,date_2019-01-05,date_2019-01-06,date_2019-01-07,date_2019-01-08,date_2019-01-09,date_2019-01-10,date_2019-01-11,date_2019-01-12,date_2019-01-13,date_2019-01-14,date_2019-01-15,date_2019-01-16,date_2019-01-17,date_2019-01-18,date_2019-01-19,date_2019-01-20,date_2019-01-21,date_2019-01-22,date_2019-01-23,date_2019-01-24,date_2019-01-25,date_2019-01-26,date_2019-01-27,date_2019-01-28,date_2019-01-29,date_2019-01-30,date_2019-01-31,date_2019-02-01,date_2019-02-02,date_2019-02-03,date_2019-02-04,date_2019-02-05,date_2019-02-06,date_2019-02-07,date_2019-02-08,date_2019-02-09,date_2019-02-10,date_2019-02-11,date_2019-02-12,date_2019-02-13,date_2019-02-14,date_2019-02-15,date_2019-02-16,date_2019-02-17,date_2019-02-18,date_2019-02-19,date_2019-02-20,date_2019-02-21,date_2019-02-22,date_2019-02-23,date_2019-02-24,date_2019-02-25,date_2019-02-26,date_2019-02-27,date_2019-02-28,date_2019-03-01,device_mobile,device_tablet,region_(not set),region_Abu Dhabi,region_Addis Ababa,region_Ajman,region_Akershus,region_Al Ahmadi Governorate,region_Al Asimah Governate,region_Al Batinah North Governorate,region_Al Farwaniyah Governorate,region_Al Jahra Governorate,region_Al Khor,region_Al Madinah Province,region_Al Qassim,region_Alabama,region_Alaska,region_Alberta,region_Algiers Province,region_Almaty Province,region_Amman Governorate,region_Andalusia,region_Andaman and Nicobar Islands,region_Andhra Pradesh,region_Arizona,region_Arkansas,region_Aseer Province,region_Assam,region_Aswan Governorate,region_Attica,region_Auckland,region_Australian Capital Territory,region_Auvergne-Rhone-Alpes,region_Baden-Wurttemberg,region_Baghdad Governorate,region_Balearic Islands,region_Bali,region_Banaadir,region_Bangkok,region_Barisal Division,region_Basel City,region_Basra Governorate,region_Bavaria,region_Beijing,region_Berlin,region_Bicol,region_Bihar,region_Binh Thuan Province,region_Braga,region_Brandenburg,region_British Columbia,region_Brunei-Muara District,region_Brussels,region_Bucharest,region_Budapest,region_Buenos Aires,region_Buskerud,region_Caicos Islands,region_Cairo Governorate,region_Calabarzon,region_California,region_Canton of Bern,region_Canton of Neuchatel,region_Capital Governorate,region_Capital Municipality,region_Capital Region of Denmark,region_Catalonia,region_Center District,region_Central Denmark Region,region_Central Development Region,region_Central Division,region_Central Governorate,region_Central Java,region_Central Region,region_Central Visayas,region_Centre,region_Chandigarh,region_Chhattisgarh,region_Chiba Prefecture,region_Chittagong Division,region_Chon Buri,region_Chuy Province,region_City of Zagreb,region_Cluj County,region_Colorado,region_Conakry,region_Connecticut,region_Cordillera Administrative Region,region_County Cork,region_County Dublin,region_County Galway,region_County Kerry,region_County Kildare,region_County Laois,region_County Limerick,region_Da Nang,region_Dadra and Nagar Haveli,region_Dakahlia Governorate,region_Dakar Region,region_Dalarna County,region_Davao Region,region_Delaware,region_Delhi,region_Dhaka Division,region_District of Columbia,region_Dnipropetrovsk Oblast,region_Doha,region_Dubai,region_Dublin City,region_East Java,region_East Kalimantan,region_Eastern Development Region,region_Eastern Province,region_Edo,region_England,region_Erbil Governorate,region_Far-Western Development Region,region_Faro District,region_Federal Territory of Kuala Lumpur,region_Flanders,region_Flevoland,region_Florida,region_Fujairah,region_Fujian,region_Gauteng,region_Gelderland,region_Geneva,region_Georgia,region_Gilgit-Baltistan,region_Giza Governorate,region_Goa,region_Grand Est,region_Greater Accra Region,region_Guangdong,region_Guatemala Department,region_Gujarat,region_Gwangju,region_Gyeonggi-do,region_Hai Phong,region_Haifa District,region_Hamburg,region_Hanoi,region_Harare Province,region_Harju County,region_Haryana,region_Hauts-de-France,region_Hawaii,region_Hawalli Governorate,region_Hebei,region_Hesse,region_Himachal Pradesh,region_Ho Chi Minh City,region_Hordaland,region_Hsinchu County,region_Hyogo Prefecture,region_Ida-Viru County,region_Idaho,region_Ile-de-France,region_Illinois,region_Ilocos Region,region_Indiana,region_Iowa,region_Islamabad Capital Territory,region_Istanbul,region_Jakarta,region_Jammu and Kashmir,region_Jazan,region_Jerusalem District,region_Jharkhand,region_Jiangsu,region_Johor,region_Kabul,region_Kanagawa Prefecture,region_Kansas,region_Karbala Governorate,region_Karnataka,region_Kemerovo Oblast,region_Kentucky,region_Kerala,region_Kharkiv Oblast,region_Khomas Region,region_Khulna Division,region_Khyber Pakhtunkhwa,region_Kiambu County,region_Kigali City,region_Klaipeda County,region_Kronoberg County,region_KwaZulu-Natal,region_Kyiv city,region_La Pampa,region_Laghouat Province,region_Lagos,region_Lakshadweep,region_Lazio,region_Lesser Poland Voivodeship,region_Limassol,region_Limburg,region_Lisbon,region_Ljubljana,region_Lombardy,region_Louisiana,region_Lower Saxony,region_Luanda Province,region_Lusaka Province,region_Madhya Pradesh,region_Madrid,region_Maharashtra,region_Makkah Province,region_Mandalay Region,region_Mangystau Province,region_Manipur,region_Manitoba,region_Maryland,region_Masovian Voivodeship,region_Massachusetts,region_Mecklenburg-Vorpommern,region_Meghalaya,region_Metro Manila,region_Metropolitan City of Milan,region_Mexico City,region_Michigan,region_Mid-Western Development Region,region_Minnesota,region_Minsk Region,region_Mississippi,region_Missouri,region_Miyazaki Prefecture,region_Mizoram,region_Mombasa County,region_Montana,region_Morobe Province,region_Moscow,region_Muharraq Governorate,region_Muscat Governorate,region_Mymensingh Division,region_Nagaland,region_Nagano Prefecture,region_Nairobi County,region_Nakhon Pathom,region_Naypyidaw Union Territory,region_Nebraska,region_Negeri Sembilan,region_Nevada,region_New Hampshire,region_New Jersey,region_New South Wales,region_New York,region_Newfoundland and Labrador,region_Nicosia,region_Nidwalden,region_North Brabant,region_North Carolina,region_North Dakota,region_North Holland,region_North Ras Al Khaimah,region_North Rhine-Westphalia,region_North Sumatra,region_Northern Borders Province,region_Northern Ireland,region_Northern Mindanao,region_Northern Province,region_Nova Scotia,region_Nuevo Leon,region_Obwalden,region_Odisha,region_Ogun State,region_Ohio,region_Oklahoma,region_Ontario,region_Oregon,region_Oslo,region_Ouest Department,region_Oyo,region_Pardubice Region,region_Pembroke Parish,region_Penang,region_Pennsylvania,region_Perak,region_Phnom Penh,region_Phra Nakhon Si Ayutthaya,region_Piedmont,region_Plaines Wilhems District,region_Port Louis District,region_Prague,region_Pristina District,region_Puducherry,region_Punjab,region_Quebec,region_Queensland,region_Rajasthan,region_Rajshahi Division,region_Rangpur Division,region_Ras al Khaimah,region_Rayong,region_Red Sea Governorate,region_Region Zealand,region_Region of Southern Denmark,region_Rhineland-Palatinate,region_Rhode Island,region_Riga,region_Rivers,region_Riyadh Province,region_Rogaland,region_Saint Petersburg,region_Saitama Prefecture,region_Sakhalin Oblast,region_Samut Prakan,region_Sarawak,region_Saskatchewan,region_Saxony,region_Schleswig-Holstein,region_Scotland,region_Selangor,region_Seoul,region_Sharjah,region_Siem Reap Province,region_Sikkim,region_Silesian Voivodeship,region_Sindh,region_Skane County,region_Sofia City Province,region_Sohag Governorate,region_South Australia,region_South Carolina,region_South Governorate,region_South Holland,region_South Moravian Region,region_South Sumatra,region_South-East District,region_Southern Governorate,region_St. Gallen,region_St. James Parish,region_State of Mato Grosso,region_State of Rio de Janeiro,region_State of Santa Catarina,region_State of Sao Paulo,region_Stockholm County,region_Styria,region_Suez Governorate,region_Sylhet Division,region_Tabuk Province,region_Taichung City,region_Taipei City,region_Tamil Nadu,region_Tanga Region,region_Taoyuan County,region_Taranaki,region_Tatarstan,region_Tehran,region_Tel Aviv District,region_Telangana,region_Tennessee,region_Texas,region_Thai Binh,region_Thuringia,region_Tokyo,region_Tra Vinh,region_Tripoli District,region_Tripura,region_Udon Thani,region_Ulaanbaatar,region_Umm Salal Municipality,region_Utah,region_Utrecht,region_Uttar Pradesh,region_Uttarakhand,region_Uusimaa,region_Vallee du Bandama District,region_Varna,region_Vastra Gotaland County,region_Vermont,region_Vestfold,region_Victoria,region_Vienna,region_Virginia,region_Wales,region_Walloon Region,region_Washington,region_Wellington,region_West Bengal,region_West Coast,region_West Java,region_Western Area,region_Western Australia,region_Western Cape,region_Western Development Region,region_Western Province,region_Western Visayas,region_Wisconsin,region_Wyoming,region_Yangon Region,region_Yucatan,region_Zabaykalsky Krai,region_Zaire Province,region_Zakarpats'ka oblast,region_Zurich,sourceMedium_10.85.52.152 / referral,sourceMedium_127.0.0.1:50495 / referral,sourceMedium_172.17.0.40:3000 / referral,sourceMedium_172.24.0.151:1000 / referral,sourceMedium_1800customercare.com / referral,sourceMedium_192.168.0.1 / referral,sourceMedium_192.168.12.138:81 / referral,sourceMedium_192.168.17.237:197 / referral,sourceMedium_Blog / All-Gud-Things,sourceMedium_Facebook / Social,sourceMedium_Google / (not set),sourceMedium_Google / Search,sourceMedium_Google / cpc,sourceMedium_GrabOn / (not set),sourceMedium_GrabOn / GrabOn,sourceMedium_Quora / (not set),sourceMedium_Social / PayTM-Offer,sourceMedium_YouTube / video,sourceMedium_aaditrienterprise.in / referral,sourceMedium_abc-directory.com / referral,sourceMedium_acs-safekey.americanexpress.com / referral,sourceMedium_acs2.onlinesbi.com / referral,sourceMedium_adfs.contiwan.com / referral,sourceMedium_adfs.flowserve.com / referral,sourceMedium_adguard.com / referral,sourceMedium_adjetter.com / referral,sourceMedium_allindiajobs.in / referral,sourceMedium_alljobassam.com / referral,sourceMedium_amazon.in / referral,sourceMedium_amp-reddit-com.cdn.ampproject.org / referral,sourceMedium_analytics.google.com / referral,sourceMedium_announcements.myefl.com / referral,sourceMedium_applications.wes.org / referral,sourceMedium_aquaguardonthego.in / referral,sourceMedium_ascentgroup.org / referral,sourceMedium_ask / organic,sourceMedium_au.search.yahoo.com / referral,sourceMedium_avg / organic,sourceMedium_babylon / organic,sourceMedium_bestwaterpurifiers.in / referral,sourceMedium_bing / organic,sourceMedium_bitmotion-tab.com / referral,sourceMedium_biztools.corp.google.com / referral,sourceMedium_brandkathas.blogspot.com / referral,sourceMedium_broomberg-in.cdn.ampproject.org / referral,sourceMedium_ca.search.yahoo.com / referral,sourceMedium_cleanindiashow.com / referral,sourceMedium_clickserve.dartsearch.net / referral,sourceMedium_cn.bing.com / referral,sourceMedium_companies.naukri.com / referral,sourceMedium_contents-india.internet.apps.samsung.com / referral,sourceMedium_conzumr.com / referral,sourceMedium_couponzguru.com / referral,sourceMedium_courier9.com / referral,sourceMedium_cp.yojnaindia.com:2096 / referral,sourceMedium_cpcthrissur.com / referral,sourceMedium_cse.google.com / referral,sourceMedium_customer-carenumber.com / referral,sourceMedium_customercarecontacts-com.cdn.ampproject.org / referral,sourceMedium_customercaredb.in / referral,sourceMedium_customercarehelp.in / referral,sourceMedium_customercareinfo.in / referral,sourceMedium_customercarephonenumber.in / referral,sourceMedium_customerkarts.com / referral,sourceMedium_de.search.yahoo.com / referral,sourceMedium_dealsunny.com / referral,sourceMedium_demo.draquaguard.co.in / referral,sourceMedium_dev.manavit.com / referral,sourceMedium_dialtous.com / referral,sourceMedium_discretesearch.com / referral,sourceMedium_docs.google.com / referral,sourceMedium_dps.fisglobal.com / referral,sourceMedium_draeroguard.com / referral,sourceMedium_draquaguard.co.in / referral,sourceMedium_dropprice.co.in / referral,sourceMedium_duckduckgo / organic,sourceMedium_duckduckgo.com / referral,sourceMedium_ecosia.org / referral,sourceMedium_ecustomer.in / referral,sourceMedium_efl-dev.iksuladev.com / referral,sourceMedium_email.iitm.ac.in / referral,sourceMedium_en.m.wikipedia.org / referral,sourceMedium_en.wikipedia.org / referral,sourceMedium_enow.com / referral,sourceMedium_ess.adp.in / referral,sourceMedium_eurekacsd.com / referral,sourceMedium_eurekaforbesreferralprogram.com / referral,sourceMedium_eurosmile.in / referral,sourceMedium_facebook / social,sourceMedium_facebook.com / referral,sourceMedium_facebook_shopsection / social,sourceMedium_fed.apps.technip.com / referral,sourceMedium_firefightingequipments.in / referral,sourceMedium_forbesfacility.com / referral,sourceMedium_forbeshealthconditioners.com / referral,sourceMedium_forbespro.blogspot.com / referral,sourceMedium_forbesprowatersolutions.com / referral,sourceMedium_fssfedpitc.ge.com / referral,sourceMedium_g.results.supply / referral,sourceMedium_gaintheory / amc_emailer,sourceMedium_gaintheory / amc_sms,sourceMedium_gateway.zscalertwo.net / referral,sourceMedium_gladmin.intermesh.net / referral,sourceMedium_goldenpeacockaward.com / referral,sourceMedium_googcampanarlyCnZ1XwfDDwaAld3EALw_wcB / (not set),sourceMedium_google / (not set),sourceMedium_google / cpc,sourceMedium_google / organic,sourceMedium_google.co.in / referral,sourceMedium_google.com / referral,sourceMedium_googleads.g.doubleclick.net / referral,sourceMedium_googleadservices.com / referral,sourceMedium_googleapis.com / referral,sourceMedium_goto.google.com / referral,sourceMedium_gpo.iitb.ac.in / referral,sourceMedium_grabon / (not set),sourceMedium_greateastern.in / referral,sourceMedium_groupcareershaper.com / referral,sourceMedium_gstatic.com / referral,sourceMedium_healthyhomes / blog,sourceMedium_healthyhomes.in / referral,sourceMedium_helplinenumber-net.cdn.ampproject.org / referral,sourceMedium_helplinenumber.net / referral,sourceMedium_hiyo.com / referral,sourceMedium_home.myplaycity.com / referral,sourceMedium_homesessive.com / referral,sourceMedium_huaweimobilewifi.com / referral,sourceMedium_iksula.kapdesk.com / referral,sourceMedium_ilovebargain.com / referral,sourceMedium_images.google.com / referral,sourceMedium_in.downloadsearch.cnet.com / referral,sourceMedium_in.linkedin.com / referral,sourceMedium_in.search.yahoo.com / referral,sourceMedium_in.yhs4.search.yahoo.com / referral,sourceMedium_in.zapmetasearch.com / referral,sourceMedium_inbrowserapp.com / referral,sourceMedium_indiacustomercare.com / referral,sourceMedium_info.com / referral,sourceMedium_info.dogpile.com / referral,sourceMedium_inmumm28.tcs.com / referral,sourceMedium_instagram.com / referral,sourceMedium_int.search.tb.ask.com / referral,sourceMedium_internet-start.net / referral,sourceMedium_izito.co.in / referral,sourceMedium_japanelectronics.com.bd / referral,sourceMedium_jobs.nenow.in / referral,sourceMedium_justdial.com / referral,sourceMedium_karnatakaone.gov.in / referral,sourceMedium_kitchenarena.in / referral,sourceMedium_l.facebook.com / referral,sourceMedium_l.wl.co / referral,sourceMedium_lastpass.com / referral,sourceMedium_lavasoft.gosearchresults.com / referral,sourceMedium_linkedin / social,sourceMedium_linkedin.com / referral,sourceMedium_linkspurt.com / referral,sourceMedium_livemobilesearch.com / referral,sourceMedium_lm.facebook.com / referral,sourceMedium_lnkd.in / referral,sourceMedium_localhost / referral,sourceMedium_localhost:4067 / referral,sourceMedium_login.ext.hpe.com / referral,sourceMedium_login.zscloud.net / referral,sourceMedium_ludhianaonline.com / referral,sourceMedium_m.facebook.com / referral,sourceMedium_m.indiacustomercare.com / referral,sourceMedium_m.youtube.com / referral,sourceMedium_m1.hs9.in:2096 / referral,sourceMedium_mail.autobahntrucking.com / referral,sourceMedium_mail.bagnetwork.in / referral,sourceMedium_mail.bhel.in / referral,sourceMedium_mail.cognizant.com / referral,sourceMedium_mail.google.com / referral,sourceMedium_mail.gov.in / referral,sourceMedium_mail.reb.mozaic.in / referral,sourceMedium_mail.tropmet.res.in / referral,sourceMedium_mail.yahoo.com / referral,sourceMedium_merchants.google.com / referral,sourceMedium_mg.mail.yahoo.com / referral,sourceMedium_msn.com / referral,sourceMedium_myactivity.google.com / referral,sourceMedium_mycheaptraffic.com / referral,sourceMedium_myprivatesearch.com / referral,sourceMedium_naukri.com / referral,sourceMedium_new.internet-start.net / referral,sourceMedium_newtab.club / referral,sourceMedium_nileshpawar.com / referral,sourceMedium_np.reddit.com / referral,sourceMedium_onlinemlmcommunity.com / referral,sourceMedium_outlook.live.com / referral,sourceMedium_ph.search.yahoo.com / referral,sourceMedium_pipecandy.com / referral,sourceMedium_pr1.netcoresmartech.com / referral,sourceMedium_premium.aegislisa.net / referral,sourceMedium_prod.uhrs.playmsn.com / referral,sourceMedium_prodx.in / referral,sourceMedium_prolinks.rediffmailpro.com / referral,sourceMedium_quora.com / referral,sourceMedium_r.search.aol.com / referral,sourceMedium_reddit.com / referral,sourceMedium_rediffmail.com / referral,sourceMedium_reliableresources.in / referral,sourceMedium_republictv / video,sourceMedium_results.searchlock.com / referral,sourceMedium_s.zlsite.com / referral,sourceMedium_search.1and1.com / referral,sourceMedium_search.facemojikeyboard.com / referral,sourceMedium_search.gmx.net / referral,sourceMedium_search.handy-tab.com / referral,sourceMedium_search.mysearch.com / referral,sourceMedium_searchencrypt.com / referral,sourceMedium_searchingdog.com / referral,sourceMedium_searchini.com / referral,sourceMedium_searchprivacy.co / referral,sourceMedium_secure4.arcot.com / referral,sourceMedium_securegw.paytm.in / referral,sourceMedium_secureonline.idbibank.com / referral,sourceMedium_servicecentrelocator.in / referral,sourceMedium_shapoorjipallonji.com / referral,sourceMedium_sharptronics.in / referral,sourceMedium_shop.cafecoffeeday.com / referral,sourceMedium_shopclues.com / referral,sourceMedium_silver-search.com / referral,sourceMedium_sinnfpa01.amers.ibechtel.com:6080 / referral,sourceMedium_speedial.com / referral,sourceMedium_start.mysearchdial.com / referral,sourceMedium_supermario.corp.google.com / referral,sourceMedium_surf.bluetouch.hk / referral,sourceMedium_t.co / referral,sourceMedium_t.justdial.com / referral,sourceMedium_team-bhp.com / referral,sourceMedium_tools_aws2.dataweave.co / referral,sourceMedium_translate.googleusercontent.com / referral,sourceMedium_twitter / social,sourceMedium_uk.downloadsearch.cnet.com / referral,sourceMedium_uk.search.yahoo.com / referral,sourceMedium_us.search.yahoo.com / referral,sourceMedium_varmas.in / referral,sourceMedium_water--purifiers-com.cdn.ampproject.org / referral,sourceMedium_water-purifier-systems.blogspot.com / referral,sourceMedium_water-purifiers.com / referral,sourceMedium_waterpurifier.org / referral,sourceMedium_waterpurifierdealers.com / referral,sourceMedium_web.facebook.com / referral,sourceMedium_web.telegram.org / referral,sourceMedium_weberp2.intermesh.net / referral,sourceMedium_webmail.tatadocomo.com / referral,sourceMedium_webmail.vsnl.com / referral,sourceMedium_webmail2.iitk.ac.in / referral,sourceMedium_windowssearch.com / referral,sourceMedium_www-customercarephonenumber-in.cdn.ampproject.org / referral,sourceMedium_xaxis / video,sourceMedium_yahoo / organic,sourceMedium_yopmail.com / referral,sourceMedium_youtube / social,sourceMedium_youtube / video,sourceMedium_youtube.com / referral,sourceMedium_zapmeta.co.in / referral,sourceMedium_zapr / video,sourceMedium_zelect.in / referral
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,91.0,1.0,0.0,0.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1.0,0.0,0.0,141.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,142.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,999.0,10.0,1.0,0.0,0.0,1542.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,107.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1761.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [23]:
y = df['converted_in_7days']
X = df.drop(columns = 'converted_in_7days')

In [24]:
# Definimos la semilla para el generador de número aleatorios
np.random.seed(77300)

# Dividimos los datos aleatoriamente en 80% para entrenamiento y 20% para prueba 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.20, stratify=y)
# IMPORTANTE: Las muestras están estratificadas, i.e., la proporción de clientes retenidos y no-retenidos es la misma en ambos

# Chequeemos los resultados
print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)


(567395, 754)
(567395,)
(141849, 754)
(141849,)


In [25]:
# Primero, definimos un conjunto de funciones para calcular las métricas del modelo

# Curva ROC
def plot_roc(y_test, y_pred):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1, drop_intermediate = False)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (AUC = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([-0.001, 1.001])
    plt.ylim([-0.001, 1.001])
    plt.xlabel('1-Specificity (False Negative Rate)')
    plt.ylabel('Sensitivity (True Positive Rate)')
    plt.title('ROC curve')
    plt.legend(loc="lower right")
    plt.show()

# Matriz de Confusión: cm[0,0], cm[0,1], cm[1,0], cm[1,1]: tn, fp, fn, tp

# Sensitivity
def custom_sensitivity_score(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
    return (tp/(tp+fn))

# Specificity
def custom_specificity_score(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
    return (tn/(tn+fp))

# Positive Predictive Value
def custom_ppv_score(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
    return (tp/(tp+fp))

# Negative Predictive Value
def custom_npv_score(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
    return (tn/(tn+fn))

# Accuracy
def custom_accuracy_score(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = cm[0][0], cm[0][1], cm[1][0], cm[1][1]
    return ((tn+tp)/(tn+tp+fn+fp))

In [None]:
from sklearn import svm
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

svm_estimators = []
svm_estimators.append(('normalize', MinMaxScaler())) # escalamos los datos
svm_estimators.append(('svm', svm.SVC(probability=True))) # definimos SVM con probabilidades 
     
# Definimos el modelo SVM y lo llamamos classifier_SVM
Classifier_SVM = Pipeline(svm_estimators, verbose=False)

# Entrenamos el modelo classifier_SVM sobre los datos de entrenamiento
Classifier_SVM.fit(X_train, y_train)

In [None]:
# Usamos el modelo desarrollado, para predecir sobre los datos de prueba 
y_pred_prob = Classifier_SVM.predict_proba(X_test)[:,1] # probabilidades
class_threshold = 0.6073
y_pred = np.where(y_pred_prob > class_threshold, 1, 0) # clasificación
y_pred_prob

In [None]:
### REFERENCIAS SOBRE RFE (Recursive Feature Elimination)
### https://towardsdatascience.com/feature-selection-in-python-recursive-feature-elimination-19f1c39b8d15
### https://www.kite.com/python/docs/sklearn.feature_selection.RFE

from sklearn.svm import LinearSVC
from sklearn.feature_selection import RFE

# create the RFE model for the svm classifier and select attributes
rfeSVM = RFE(estimator=Classifier_SVM, n_features_to_select=20, step=1) 
rfeSVM.fit(X_train, y_train)

ranking = rfeSVM.ranking_.reshape(len(X_train.columns))

# Cuáles son las 20 variables que quedan en el modelo?
pd.DataFrame([X_test.columns,ranking]).transpose().sort_values(1).head(30)