In [None]:
# This R environment comes with many helpful analytics packages installed
# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats
# For example, here's a helpful package to load

library(tidyverse) # metapackage of all tidyverse packages

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

list.files(path = "../input")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
library(ggridges)

# Exploratory Analysis

In [None]:
# input data
ads <- read_csv("/kaggle/input/clicks-conversion-tracking/KAG_conversion_data.csv")

In [None]:
# take a look
head(ads)

In [None]:
summary(ads[,c('Impressions','Clicks','Spent','Total_Conversion','Approved_Conversion')])

In [None]:
# create a data frame with the summary statistics
summary_table <- data.frame(
  Min = c(87, 0, 0, 0, 0),
  Q1 = c(6504, 1, 1.48, 1, 0),
  Median = c(51509, 8, 12.37, 1, 1),
  Mean = c(186732, 33.39, 51.36, 2.856, 0.944),
  Q3 = c(221769, 37.50, 60.02, 3, 1),
  Max = c(3052003, 421, 639.95, 60, 21)
)

# add row names
rownames(summary_table) <- c("Impressions", "Clicks", "Spent", "Total_Conversion", "Approved_Conversion")

# print the summary table
summary_table

## Distribution

In [None]:
# Age Distribution
ggplot(data = ads, mapping = aes(x=age))+
    geom_bar(fill = "light blue") +
    geom_text(stat = 'count',aes(label = after_stat(count)),vjust=-0.5)+
    labs(title = "Age Distribution")

In [None]:
# Gender Distribution
ggplot(data = ads, mapping = aes(x=gender))+
    geom_bar(fill = "orange") +
    geom_text(stat = 'count',aes(label = after_stat(count)),vjust=-0.5)+
    labs(title = "Gender Distribution")

In [None]:
# Age + Gender Distribution
ggplot(data = ads, mapping = aes(x=age))+
    geom_bar(aes(fill = gender)) +
    geom_text(stat = 'count',aes(label = after_stat(count)),vjust=-0.5)+
    labs(title = "Age + Gende Distribution")

In [None]:
ggplot(data = ads, mapping = aes(x=gender))+
    geom_bar(aes(fill = age)) +
    geom_text(stat = 'count',aes(label = after_stat(count)),vjust=-0.5)

In [None]:
# Interest Distribution
ggplot(data = ads, mapping = aes(x=as.factor(interest)))+
    geom_bar(width = 0.5) +
    geom_text(stat = 'count',aes(label = after_stat(count)),vjust=-0.5)+
    labs(title = "Interest Distribution")

In [None]:
ads$interest <- as.factor(ads$interest)

In [None]:
# Impression Distribution
ggplot(data = ads, mapping = aes(x=Impressions))+
    geom_histogram(width = 0.5) +
    labs(title = "Impression Distribution")

In [None]:
ads$xyz_campaign_id <- as.factor(ads$xyz_campaign_id)

In [None]:
ggplot(data = ads, aes(x = Impressions, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

In [None]:
# Clicks Distribution
ggplot(data = ads, mapping = aes(x=Clicks))+
    geom_histogram(width = 0.5) +
    labs(title = "Clicks Distribution")

In [None]:
ggplot(data = ads, aes(x = Clicks, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

In [None]:
# Spent Distribution
ggplot(data = ads, mapping = aes(x=Spent))+
    geom_histogram(width = 0.5) +
    labs(title = "Spent Distribution")

In [None]:
ggplot(data = ads, aes(x = Spent, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

In [None]:
# Spent per Click
ads$CPC <- ads$Spent / ads$Clicks

In [None]:
ggplot(data = ads, mapping = aes(x=CPC))+
    geom_histogram(width = 0.5) +
    labs(title = "CPC Distribution")

In [None]:
ggplot(data = ads, aes(x = CPC, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

In [None]:
# Conversion Distribution
ggplot(data = ads, mapping = aes(x=Total_Conversion))+
    geom_histogram(width = 0.5) +
    labs(title = "Conversion Distribution")

In [None]:
ggplot(data = ads, aes(x = Total_Conversion, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

In [None]:
ggplot(data = ads, mapping = aes(x=conv_rate))+
    geom_histogram(width = 0.5) +
    labs(title = "Conversion Distribution")

In [None]:
# Approved Conversion Distribution
ggplot(data = ads, mapping = aes(x=Approved_Conversion))+
    geom_histogram(width = 0.5) +
    labs(title = "Approved Conversion Distribution")

In [None]:
ggplot(data = ads, aes(x = Approved_Conversion, y = xyz_campaign_id, fill = xyz_campaign_id)) +
  geom_density_ridges() +
  theme_ridges() +
  theme(legend.position = "none")

## Aggregation

In [None]:
ads %>%
  group_by(xyz_campaign_id) %>%
  summarise(
    Impressions = sum(Impressions),
    Clicks = sum(Clicks),
    Spent = sum(Spent),
    Total_Conversion = sum(Total_Conversion),
    Approved_Conversion = sum(Approved_Conversion),
    CTR = sum(Clicks) / sum(Impressions) * 100,
    CPC = sum(Spent) / sum(Clicks),
    CPA = sum(Spent) / sum(Approved_Conversion)
  ) -> ads_grouped

In [None]:
ads_grouped

In [None]:
ads$CPA <- ads$Spent/ads$Approved_Conversion

In [None]:
ads$vCPA <- ads$Approved_Conversion/ads$Spent

In [None]:
head(ads)

In [None]:
ads$CTR <- ads$Clicks / ads$Impressions
ads$CR <- ads$Approved_Conversion / ads$Clicks

In [None]:
abCTRlogit <- glm(CTR ~ xyz_campaign_id, data = ads, family = binomial(link = "logit"))

In [None]:
# 不知道为什么有conversion > Click,排除一下
ads_new <- ads[ads$Approved_Conversion <= ads$Clicks,]

In [None]:
summary(abCTRlogit)

In [None]:
abCTRprobit <- glm(CTR ~ xyz_campaign_id, data = ads, family = binomial(link = "probit"))
summary(abCTRprobit)

In [None]:
abCTRcloglog <- glm(CTR ~ xyz_campaign_id, data = ads, family = binomial(link = "cloglog"))
summary(abCTRcloglog)

In [None]:
abCRlogit <- glm(CR ~ xyz_campaign_id, data = ads_new, family = binomial(link = "logit"))
summary(abCRlogit)

In [None]:
abCRprobit <- glm(CR ~ xyz_campaign_id, data = ads_new, family = binomial(link = "probit"))
summary(abCRprobit)

In [None]:
abCRcloglog <- glm(CR ~ xyz_campaign_id, data = ads_new, family = binomial(link = "cloglog"))
summary(abCRcloglog)

In [None]:
library(lme4)

In [None]:
rCR <- glmer(CR ~ Impressions + Clicks + CPC + Spent
             + (1|fb_campaign_id), data = ads_new, family = binomial,
             glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
summary(rCR)

In [None]:
abCRlogit2 <- glm(CR ~ xyz_campaign_id + xyz_campaign_id:gender + xyz_campaign_id:age , data = ads_new, family = binomial(link = "logit"))
summary(abCRlogit2)