### Import Relevant packages

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

### Read the dataset

In [17]:
df = pd.read_csv('./dataset.csv')

### Compute click through rate

In [18]:
df['click_through_rates'] = df['click-through-event'] / df['impression']

### Replace catagorical values with dummy variabels

Since region and season are catagorical values and have more than two values we will replace them with dummy variables to compute their correlation with click_through_rates

In [23]:
df_dummies_region = pd.get_dummies(df['region'])
df_dummies_season = pd.get_dummies(df['season'])

del df_dummies_region[df_dummies_region.columns[-1]]
del df_dummies_season[df_dummies_season.columns[-1]]

df_new = df.copy()
del df_new['region'] 
del df_new['first_key'] 
del df_new['version'] 
del df_new['season'] 
del df_new['vertical'] 
del df_new['click-through-event']
del df_new['first_dropped'] 
del df_new['impression']

df_region = pd.concat([df_new, df_dummies_region], axis=1)
df_season = pd.concat([df_new, df_dummies_season], axis=1)

### Correlation between region and click through rate

In [24]:
df_region.corr()

Unnamed: 0,click_through_rates,Africa,Americas,Asia,Europe,Oceania
click_through_rates,1.0,-0.020965,0.105448,-0.037462,-0.141266,0.024231
Africa,-0.020965,1.0,-0.062936,-0.077779,-0.083496,-0.00585
Americas,0.105448,-0.062936,1.0,-0.175069,-0.187938,-0.013168
Asia,-0.037462,-0.077779,-0.175069,1.0,-0.23226,-0.016273
Europe,-0.141266,-0.083496,-0.187938,-0.23226,1.0,-0.017469
Oceania,0.024231,-0.00585,-0.013168,-0.016273,-0.017469,1.0


### Correlation graph between region and click through rate

In [29]:
df_region.corr().style.background_gradient(cmap='PRGn')

Unnamed: 0,click_through_rates,Africa,Americas,Asia,Europe,Oceania
click_through_rates,1.0,-0.020965,0.105448,-0.037462,-0.141266,0.024231
Africa,-0.020965,1.0,-0.062936,-0.077779,-0.083496,-0.00585
Americas,0.105448,-0.062936,1.0,-0.175069,-0.187938,-0.013168
Asia,-0.037462,-0.077779,-0.175069,1.0,-0.23226,-0.016273
Europe,-0.141266,-0.083496,-0.187938,-0.23226,1.0,-0.017469
Oceania,0.024231,-0.00585,-0.013168,-0.016273,-0.017469,1.0


### Correlation between season and click through rate

In [25]:
df_season.corr()

Unnamed: 0,click_through_rates,Autumn,Hiver,Spring,Summer
click_through_rates,1.0,-0.04582,-0.073163,0.016066,0.068927
Autumn,-0.04582,1.0,-0.138082,-0.236966,-0.378739
Hiver,-0.073163,-0.138082,1.0,-0.147519,-0.235778
Spring,0.016066,-0.236966,-0.147519,1.0,-0.404622
Summer,0.068927,-0.378739,-0.235778,-0.404622,1.0


### Correlation graph between season and click through rate

In [31]:
df_season.corr().style.background_gradient(cmap='PRGn')

Unnamed: 0,click_through_rates,Autumn,Hiver,Spring,Summer
click_through_rates,1.0,-0.04582,-0.073163,0.016066,0.068927
Autumn,-0.04582,1.0,-0.138082,-0.236966,-0.378739
Hiver,-0.073163,-0.138082,1.0,-0.147519,-0.235778
Spring,0.016066,-0.236966,-0.147519,1.0,-0.404622
Summer,0.068927,-0.378739,-0.235778,-0.404622,1.0
