### Anaylsing and predicting Click activity by one user

> Clicked - Target variable; whether the user will click the ad or not.<br>
> Names,emails,country - Highly cardinal categorical columns, need to be removed or replaced<br>
> Time Spent on Site - Minutes spent by the user in one session.<br>
> Salary - Salary of user<br>


In [None]:
import pandas as pd
import pycountry_convert as pc

import seaborn as sns
sns.set_theme(style='whitegrid')
import matplotlib.pyplot as plt


from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [None]:
df_orig = pd.read_csv('Facebook_Ads_2.csv',encoding='latin1')
df_orig.head()

### Use Continent instead of highly cardinal Country  and re-map unidentified countries manually



In [None]:
df_orig.nunique()

In [None]:
def country_to_continent(country_name):
    try:
        country_alpha2 = pc.country_name_to_country_alpha2(country_name)
        country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
        country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
        return country_continent_name
    except:
        return "Unidentified"

df  = df_orig.copy()
df['Continent'] = df['Country'].apply(country_to_continent)
df


In [None]:
df.groupby('Continent').agg({'Country':'nunique'})

Replace Unidentified Continents by manually mapping the countries

In [None]:
list_countries = list(df[df['Continent']=='Unidentified'].Country.unique())
list_countries

In [None]:
list_continents=['North America','Asia','Africa','North America','Antarctica','Africa','North America','Antarctica',
                 'North America','Africa','Europe','Antarctica','Antarctica','Asia']
dict_to_map_countries = dict(zip(list_countries,list_continents))

def map_country_continent(X):
    if X[1]=='Unidentified':
        X[1]=X[0].map(dict_to_map_countries)
    return X[1]

df.loc[df['Continent']=='Unidentified','Continent'] = \
      df.loc[df['Continent']=='Unidentified','Country'].map(dict_to_map_countries)
df.groupby('Continent').agg({'Country':'nunique'})

Now we have all 7 continents identified. We can use this variable instead of the highly cardinal Country variable.

### EDA

In [None]:
df.head()

In [None]:
# Missing values in data
sns.heatmap(df.isnull())

In [None]:
plt.figure(figsize=(12,20))
plt.subplot(511)
sns.histplot(data=df,x='Salary')
plt.subplot(512)
sns.histplot(data=df,x='Salary',hue='Clicked')
plt.subplot(513)
sns.histplot(data=df,x='Time Spent on Site')
plt.subplot(514)
sns.histplot(data=df,x='Time Spent on Site',hue='Clicked')
plt.subplot(515)
sns.countplot(data=df,x='Continent',hue='Clicked')


In [None]:
plt.figure(figsize=(10,8))
sns.scatterplot(data=df,x='Time Spent on Site',y='Salary',hue='Clicked')

### Prepare final dataset

In [None]:
# Prepare Final Processed data

continents = pd.get_dummies(df['Continent'],drop_first=True)
processed_data = df.drop(columns=['Country','Names','emails','Continent']).join(continents)

processed_data

In [None]:
X = processed_data.drop(columns='Clicked').values
y = processed_data['Clicked'].values.reshape(-1,1)

min_max = MinMaxScaler()
X=min_max.fit_transform(X)
X.shape,y.shape

### Model - Logistic regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

In [None]:
lr=LogisticRegression(random_state=0)
lr.fit(X_train,y_train)
y_pred_train = lr.predict(X_train)
y_pred_test = lr.predict(X_test)

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(211)
plt.title('Train set')
sns.heatmap(confusion_matrix(y_pred_train,y_train),annot=True)
plt.subplot(212)
plt.title('Test set')
sns.heatmap(confusion_matrix(y_pred_test,y_test),annot=True)


In [None]:
print("Train set\n")
print(classification_report(y_pred_train,y_train))

In [None]:
print("Test set\n")
print(classification_report(y_pred_test,y_test))