#### About
> Marketing campaign's conversion rate analysis

Dataset - https://www.kaggle.com/datasets/loveall/clicks-conversion-tracking

In [1]:
#importing modules
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
df_original = pd.read_csv('/home/suraj/ClickUp/Mar-Apr/data/KAG_conversion_data.csv')

In [3]:
df = df_original
df

Unnamed: 0,ad_id,xyz_campaign_id,fb_campaign_id,age,gender,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion
0,708746,916,103916,30-34,M,15,7350,1,1.430000,2,1
1,708749,916,103917,30-34,M,16,17861,2,1.820000,2,0
2,708771,916,103920,30-34,M,20,693,0,0.000000,1,0
3,708815,916,103928,30-34,M,28,4259,1,1.250000,1,0
4,708818,916,103928,30-34,M,28,4133,1,1.290000,1,1
...,...,...,...,...,...,...,...,...,...,...,...
1138,1314410,1178,179977,45-49,F,109,1129773,252,358.189997,13,2
1139,1314411,1178,179978,45-49,F,110,637549,120,173.880003,3,0
1140,1314412,1178,179979,45-49,F,111,151531,28,40.289999,2,0
1141,1314414,1178,179981,45-49,F,113,790253,135,198.710001,8,2


In [4]:
df.columns

Index(['ad_id', 'xyz_campaign_id', 'fb_campaign_id', 'age', 'gender',
       'interest', 'Impressions', 'Clicks', 'Spent', 'Total_Conversion',
       'Approved_Conversion'],
      dtype='object')

#### 1. Data pre-processing.

In [5]:
#drop unneccessary columns
df = df.drop(['ad_id','xyz_campaign_id','fb_campaign_id'], axis=1)

In [6]:
df

Unnamed: 0,age,gender,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion
0,30-34,M,15,7350,1,1.430000,2,1
1,30-34,M,16,17861,2,1.820000,2,0
2,30-34,M,20,693,0,0.000000,1,0
3,30-34,M,28,4259,1,1.250000,1,0
4,30-34,M,28,4133,1,1.290000,1,1
...,...,...,...,...,...,...,...,...
1138,45-49,F,109,1129773,252,358.189997,13,2
1139,45-49,F,110,637549,120,173.880003,3,0
1140,45-49,F,111,151531,28,40.289999,2,0
1141,45-49,F,113,790253,135,198.710001,8,2


In [7]:
# Replace any missing values with the mean of the column
df.fillna(df.mean(), inplace=True)


  


In [8]:
df

Unnamed: 0,age,gender,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion
0,30-34,M,15,7350,1,1.430000,2,1
1,30-34,M,16,17861,2,1.820000,2,0
2,30-34,M,20,693,0,0.000000,1,0
3,30-34,M,28,4259,1,1.250000,1,0
4,30-34,M,28,4133,1,1.290000,1,1
...,...,...,...,...,...,...,...,...
1138,45-49,F,109,1129773,252,358.189997,13,2
1139,45-49,F,110,637549,120,173.880003,3,0
1140,45-49,F,111,151531,28,40.289999,2,0
1141,45-49,F,113,790253,135,198.710001,8,2


In [9]:
# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['age', 'gender'])


In [10]:
df

Unnamed: 0,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion,age_30-34,age_35-39,age_40-44,age_45-49,gender_F,gender_M
0,15,7350,1,1.430000,2,1,1,0,0,0,0,1
1,16,17861,2,1.820000,2,0,1,0,0,0,0,1
2,20,693,0,0.000000,1,0,1,0,0,0,0,1
3,28,4259,1,1.250000,1,0,1,0,0,0,0,1
4,28,4133,1,1.290000,1,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1138,109,1129773,252,358.189997,13,2,0,0,0,1,1,0
1139,110,637549,120,173.880003,3,0,0,0,0,1,1,0
1140,111,151531,28,40.289999,2,0,0,0,0,1,1,0
1141,113,790253,135,198.710001,8,2,0,0,0,1,1,0


In [12]:
# Normalize the numerical features
numerical_features = ['Impressions', 'Clicks', 'Spent', 'Total_Conversion', 'Approved_Conversion','interest']
for feature in numerical_features:
    df[feature] = (df[feature] - df[feature].mean()) / df[feature].std()

df


Unnamed: 0,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion,age_30-34,age_35-39,age_40-44,age_45-49,gender_F,gender_M
0,-0.659184,-0.573542,-0.569323,-0.574520,-0.190839,0.032222,1,0,0,0,0,1
1,-0.622081,-0.539935,-0.551746,-0.570033,-0.190839,-0.543248,1,0,0,0,0,1
2,-0.473670,-0.594826,-0.586901,-0.590974,-0.413874,-0.543248,1,0,0,0,0,1
3,-0.176847,-0.583425,-0.569323,-0.576592,-0.413874,-0.543248,1,0,0,0,0,1
4,-0.176847,-0.583827,-0.569323,-0.576131,-0.413874,0.032222,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1138,2.828481,3.015201,3.842511,3.530490,2.262551,0.607693,0,0,0,1,1,0
1139,2.865584,1.441405,1.522343,1.409752,0.032197,-0.543248,0,0,0,1,1,0
1140,2.902687,-0.112549,-0.094744,-0.127383,-0.190839,-0.543248,0,0,0,1,1,0
1141,2.976892,1.929648,1.785998,1.695455,1.147374,0.607693,0,0,0,1,1,0


#### 2. Feature Engineering

In [13]:
# Compute the click-through rate (CTR)
df['CTR'] = df['Clicks'] / df['Impressions']

In [14]:
df

Unnamed: 0,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion,age_30-34,age_35-39,age_40-44,age_45-49,gender_F,gender_M,CTR
0,-0.659184,-0.573542,-0.569323,-0.574520,-0.190839,0.032222,1,0,0,0,0,1,0.992645
1,-0.622081,-0.539935,-0.551746,-0.570033,-0.190839,-0.543248,1,0,0,0,0,1,1.021876
2,-0.473670,-0.594826,-0.586901,-0.590974,-0.413874,-0.543248,1,0,0,0,0,1,0.986676
3,-0.176847,-0.583425,-0.569323,-0.576592,-0.413874,-0.543248,1,0,0,0,0,1,0.975831
4,-0.176847,-0.583827,-0.569323,-0.576131,-0.413874,0.032222,1,0,0,0,0,1,0.975157
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,2.828481,3.015201,3.842511,3.530490,2.262551,0.607693,0,0,0,1,1,0,1.274380
1139,2.865584,1.441405,1.522343,1.409752,0.032197,-0.543248,0,0,0,1,1,0,1.056152
1140,2.902687,-0.112549,-0.094744,-0.127383,-0.190839,-0.543248,0,0,0,1,1,0,0.841798
1141,2.976892,1.929648,1.785998,1.695455,1.147374,0.607693,0,0,0,1,1,0,0.925557


In [15]:
# Compute the cost-per-click (CPC)
df['CPC'] = df['Spent'] / df['Clicks']

# Compute the cost-per-impression (CPI)
df['CPI'] = df['Spent'] / df['Impressions']


In [16]:
df

Unnamed: 0,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion,age_30-34,age_35-39,age_40-44,age_45-49,gender_F,gender_M,CTR,CPC,CPI
0,-0.659184,-0.573542,-0.569323,-0.574520,-0.190839,0.032222,1,0,0,0,0,1,0.992645,1.009128,1.001706
1,-0.622081,-0.539935,-0.551746,-0.570033,-0.190839,-0.543248,1,0,0,0,0,1,1.021876,1.033143,1.055744
2,-0.473670,-0.594826,-0.586901,-0.590974,-0.413874,-0.543248,1,0,0,0,0,1,0.986676,1.006941,0.993525
3,-0.176847,-0.583425,-0.569323,-0.576592,-0.413874,-0.543248,1,0,0,0,0,1,0.975831,1.012766,0.988288
4,-0.176847,-0.583827,-0.569323,-0.576131,-0.413874,0.032222,1,0,0,0,0,1,0.975157,1.011958,0.986818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,2.828481,3.015201,3.842511,3.530490,2.262551,0.607693,0,0,0,1,1,0,1.274380,0.918798,1.170897
1139,2.865584,1.441405,1.522343,1.409752,0.032197,-0.543248,0,0,0,1,1,0,1.056152,0.926041,0.978041
1140,2.902687,-0.112549,-0.094744,-0.127383,-0.190839,-0.543248,0,0,0,1,1,0,0.841798,1.344501,1.131799
1141,2.976892,1.929648,1.785998,1.695455,1.147374,0.607693,0,0,0,1,1,0,0.925557,0.949304,0.878635
