In [1]:
#data stuff
import time
import pandas as pd
import numpy as np
import datetime as dt

#regression stuff
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm

#graph stuff
import seaborn as sns
from plotly.subplots import make_subplots
from plotly import graph_objects as go
import plotly
import matplotlib.pyplot as plt

%matplotlib inline
pd.set_option('display.max_columns', 500)
pd.options.mode.chained_assignment = None  # default='warn'


#import functions from other file that we want
from utilities import *

# 0.1 Read in data

In [2]:
#read in clickthru data from vw dataset
q = '''SELECT 
     account
    , attr_window
    , campaign
    , _match
    , campaign_objective
    , Year
    , Quarter
    , Month
    , week
    , week_start
    , date
    , marketing_initiative
    , marketing_segment
    , marketing_subinitiative
    , channel
    , platform
    , site
    , sum(case when sor_prod = 'All (Core All, NPV)' then spend else 0 end) spend
    , sum(case when sor_prod = 'All (Core All, NPV)' then clicks else 0 end) clicks
    , sum(case when sor_prod = 'All (Core All, NPV)' then impressions else 0 end) impressions
    , sum(case
            when sor_prod = 'Core' then conversions else null end) digi_ada_conversions
    , sum(case
            when sor_prod = 'Crosswords' then conversions else null end) games_conversions
    , sum(case
            when sor_prod = 'EDU' then conversions else null end) edu_conversions
    , sum(case
            when sor_prod = 'Home Delivery' then conversions else null end) hd_conversions
    , sum(case
            when sor_prod = 'CK' then conversions else null end) ck_conversions     
    , sum(case
            when sor_prod = 'Core All (Core, HD, EDU)' then conversions else null end) core_conversions
    , sum(case
            when sor_prod = 'All (Core All, NPV)' then conversions else null end) all_conversions
FROM `nyt-mkt-prd.paid_media_data.placement_daily_vw`
WHERE date >= '2020-01-01' and attr_window in('Media Reported - Click Thru')
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17
order by date, campaign, _match
    '''
start_time = time.time() 

clickthru_df = pd.read_gbq(q,
                 project_id ='nyt-bigquery-beta-workspace',
                 dialect='standard')

print(f'time took: {str(round(time.time() - start_time, 2))}')

time took: 203.82


In [3]:
clickthru_df.head()

Unnamed: 0,account,attr_window,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions,games_conversions,edu_conversions,hd_conversions,ck_conversions,core_conversions,all_conversions
0,Audience Engagement 1,Media Reported - Click Thru,Book Review Live 2020,BRAND ASSET - B&W,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Facebook,Facebook,94.010001,142.0,26617.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Audience Engagement 1,Media Reported - Click Thru,Book Review Live 2020,QUOTE GRAPHIC,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Facebook,Facebook,27.900001,56.0,18624.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Audience Engagement 1,Media Reported - Click Thru,Book Review Live 2020,SINGLE SPEAKER ASSET - NICHOLAS,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Facebook,Facebook,2.95,8.0,3747.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,UAC_Crosswords,Media Reported - Click Thru,Crosswords UAC - US - Android,CROSSWORDS UAC - US - ANDROID,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,App-Install,X-UNKNOWN-X,Games,Display,Google Ads,UAC,888.334961,1117.0,110080.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,UAC_Crosswords,Media Reported - Click Thru,Crosswords UAC - US - iOS,CROSSWORDS UAC - US - IOS,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,App-Install,X-UNKNOWN-X,Games,Display,Google Ads,UAC,65.69326,63.0,1903.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
#read in viewthru data from vw dataset
q = '''SELECT 
     account
    , attr_window
    , campaign
    , _match
    , campaign_objective
    , Year
    , Quarter
    , Month
    , week
    , week_start
    , date
    , marketing_initiative
    , marketing_segment
    , marketing_subinitiative
    , channel
    , platform
    , site
    , sum(case when sor_prod = 'All (Core All, NPV)' then spend else 0 end) spend
    , sum(case when sor_prod = 'All (Core All, NPV)' then clicks else 0 end) clicks
    , sum(case when sor_prod = 'All (Core All, NPV)' then impressions else 0 end) impressions
    , sum(case
            when sor_prod = 'Core' then conversions else null end) digi_ada_conversions
    , sum(case
            when sor_prod = 'Crosswords' then conversions else null end) games_conversions
    , sum(case
            when sor_prod = 'EDU' then conversions else null end) edu_conversions
    , sum(case
            when sor_prod = 'Home Delivery' then conversions else null end) hd_conversions
    , sum(case
            when sor_prod = 'CK' then conversions else null end) ck_conversions     
    , sum(case
            when sor_prod = 'Core All (Core, HD, EDU)' then conversions else null end) core_conversions
    , sum(case
            when sor_prod = 'All (Core All, NPV)' then conversions else null end) all_conversions
FROM `nyt-mkt-prd.paid_media_data.placement_daily_vw`
WHERE date >= '2020-01-01' and attr_window in('Media Reported - View Thru')
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14,15,16,17
order by date, campaign, _match
    '''
start_time = time.time() 

viewthru_df = pd.read_gbq(q,
                 project_id ='nyt-bigquery-beta-workspace',
                 dialect='standard')

print(f'time took: {str(round(time.time() - start_time, 2))}')

time took: 192.1


In [5]:
#rename columns in the separate dataframes to identify clickthru and viewthru
#first clickthru
for col in clickthru_df.columns:
    if 'conversions' in col: 
        clickthru_df[col+'_ct'] = clickthru_df[col]
        clickthru_df.drop(col, axis=1, inplace=True)
        
#now viewthru
for col in viewthru_df.columns:
    if 'conversions' in col: 
        viewthru_df[col+'_vt'] = viewthru_df[col]
        viewthru_df.drop(col, axis=1, inplace=True)

In [6]:
#now merge the dataframes 
#create a truncated version of viewthru DF to make the merge more efficient
vt_merge = viewthru_df[['account','campaign','_match','date','marketing_initiative','marketing_segment'] + [col for col in viewthru_df.columns if 'conversions' in col]]

#merge with clickthru df
df = clickthru_df.merge(vt_merge, how='left', on=[col for col in vt_merge.columns if 'conversions' not in col])

In [7]:
df.shape

(874583, 34)

In [8]:
df[df.duplicated()]

Unnamed: 0,account,attr_window,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt


# 0.2 Basic Data Cleaning (Lucas)

In [9]:
#some Twigeo campaigns don't get recognized as Games campaigns in PMD, manually correct those
df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (
          (df['campaign'].str.contains('game')) | 
          (df['campaign'].str.contains('xwd')) | 
          (df['campaign'].str.contains('cross'))
       ), 'marketing_subinitiative'] = 'Games (former: Crosswords)'

In [10]:
df['marketing_subinitiative'].value_counts()

Core                          597189
Games                          81441
Cooking                        56076
X-UNKNOWN-X                    55647
Home Delivery                  48379
EDU                            28179
Games (former: Crosswords)      4371
Cooking Gift                     943
Audio                            908
Games Gift                       881
Sway                             569
Name: marketing_subinitiative, dtype: int64

In [11]:
#could probably be an easier way to do this with the week, date, month cols

#group by bigger cols using the dt module 
#apply() -> so it reads better in graphs 

#create date-based aggregation columns for easier grouping
df['year_month_cal'] = df['date'].dt.to_period('M').apply(lambda r: r.start_time)
df['year_week_monday'] = df['date'].dt.to_period('W').apply(lambda r: r.start_time)
df['year_quarter'] = df['date'].dt.to_period('Q').apply(lambda r: r.start_time)



In [12]:
#create a composite channel-marketing-initiative column for easier grouping by combos of channel and marketing_initiative 

df['channel_mkt_init'] = df['channel'] + " - " + df['marketing_initiative']

In [13]:
#info about cols -> nunique, unique, value_counts
#isna().sum(), .mean() (% of col that's null) -> are there null


df['marketing_initiative'].value_counts()

Business As Usual               327387
Sale                            226016
Content/Audience Development    209271
X-UNKNOWN-X                      47719
One Day Sale                     16743
App Download                     13609
App-Install                      11321
Branding                          9760
Brand                             5677
Gifting                           4324
Testing                           1224
Engagement                        1144
Retention                          104
December Holiday Cooking           104
Thanksgiving Cooking               104
Non-Specific                        54
Audience Content                    22
Name: marketing_initiative, dtype: int64

In [14]:
#~ means exclude
#exclude Brand, Branding, Retention, App Download, and App Install initatives bc not relevant to Chrome cookie loss
df = df[~df['marketing_initiative'].isin(['Brand','Branding','Retention','App Download', 'App-Install'])]

In [15]:
df['marketing_initiative'].value_counts()

Business As Usual               327387
Sale                            226016
Content/Audience Development    209271
X-UNKNOWN-X                      47719
One Day Sale                     16743
Gifting                           4324
Testing                           1224
Engagement                        1144
Thanksgiving Cooking               104
December Holiday Cooking           104
Non-Specific                        54
Audience Content                    22
Name: marketing_initiative, dtype: int64

# 0.3 Intro Data Cleaning Numbers

In [16]:
df.nunique()

account                        16
attr_window                     1
campaign                      232
_match                      18642
campaign_objective             47
Year                            2
Quarter                         4
Month                          12
week                           53
week_start                     80
date                          553
marketing_initiative           12
marketing_segment              62
marketing_subinitiative        10
channel                         8
platform                       15
site                           22
spend                      535652
clicks                       6490
impressions                 98339
digi_ada_conversions_ct       495
games_conversions_ct           81
edu_conversions_ct             38
hd_conversions_ct              79
ck_conversions_ct             203
core_conversions_ct           529
all_conversions_ct            608
digi_ada_conversions_vt       259
games_conversions_vt           22
edu_conversion

In [17]:
#get rid of attr_window (bc it isn't needed anymore just says click thru)
df = df.drop(['attr_window'], axis=1)

In [18]:
#add total conversions column
df['total_conversions'] = df['all_conversions_ct'] + df['all_conversions_vt']

In [19]:
#group low-volume channels 
df['new_channel'] = df['channel']
df.loc[~df['channel'].isin(['Social','Display','Paid Search','Native']), 'new_channel'] = 'Low-Volume Channels'

In [20]:
#group low-volume sites
df['new_site'] = df['site']
df.loc[~df['site'].isin(['The Trade Desk','DBM_US','Snap','Facebook','Twitter','Google AdWords (DSA)','Bing Ads', 'Google AdWords']), 'new_site'] = 'Low-Volume Sites'

In [21]:
#group low-volume products
df['new_product'] = df['marketing_subinitiative']
df.loc[~df['marketing_subinitiative'].isin(['Home Delivery','Core','Games','Cooking']), 'new_product'] = 'Low-Volume Products'

In [22]:
#check to see if the above groupings work
df['channel'].value_counts()
df['new_channel'].value_counts()
df['site'].value_counts()
df['new_site'].value_counts()
df['marketing_subinitiative'].value_counts()
df['new_product'].value_counts()

Core                   574806
Games                   78861
Low-Volume Products     77701
Cooking                 54365
Home Delivery           48379
Name: new_product, dtype: int64

## Null Values

In [23]:
#check to see how many null values there are for each column

#matches up across vt and ct -> only null values are in conversion columns
for col in df.columns:
   print(col + " " + str(df[col].isna().sum()))

account 0
campaign 0
_match 0
campaign_objective 0
Year 0
Quarter 0
Month 0
week 0
week_start 0
date 0
marketing_initiative 0
marketing_segment 0
marketing_subinitiative 0
channel 0
platform 0
site 0
spend 0
clicks 0
impressions 0
digi_ada_conversions_ct 4893
games_conversions_ct 17663
edu_conversions_ct 17780
hd_conversions_ct 18128
ck_conversions_ct 16053
core_conversions_ct 2008
all_conversions_ct 0
digi_ada_conversions_vt 4893
games_conversions_vt 17663
edu_conversions_vt 17780
hd_conversions_vt 18128
ck_conversions_vt 16053
core_conversions_vt 2008
all_conversions_vt 0
year_month_cal 0
year_week_monday 0
year_quarter 0
channel_mkt_init 0
total_conversions 0
new_channel 0
new_site 0
new_product 0


In [24]:
#replace all NaNs to 0 
df = df.fillna(0)
for col in df.columns:
   print(col + " " + str(df[col].isna().sum()))

account 0
campaign 0
_match 0
campaign_objective 0
Year 0
Quarter 0
Month 0
week 0
week_start 0
date 0
marketing_initiative 0
marketing_segment 0
marketing_subinitiative 0
channel 0
platform 0
site 0
spend 0
clicks 0
impressions 0
digi_ada_conversions_ct 0
games_conversions_ct 0
edu_conversions_ct 0
hd_conversions_ct 0
ck_conversions_ct 0
core_conversions_ct 0
all_conversions_ct 0
digi_ada_conversions_vt 0
games_conversions_vt 0
edu_conversions_vt 0
hd_conversions_vt 0
ck_conversions_vt 0
core_conversions_vt 0
all_conversions_vt 0
year_month_cal 0
year_week_monday 0
year_quarter 0
channel_mkt_init 0
total_conversions 0
new_channel 0
new_site 0
new_product 0


# 0.4 Investigate Spend -> Spend = 0 but clicks, impressions, or conversions != 0

In [25]:
#BRIEF

#pick a channel (Display) -> campaign names where there is no spend but there are impressions, etc.
#look to see if there are a lot of situations (acquisition focused campaign) -> but if it's not acuqistion 
spend_df = df.loc[(df['spend'] == 0)]
spend_df.shape

(240197, 41)

In [26]:
spend_df.value_counts('channel')

channel
Paid Search    139749
X-UNKNOWN-X     59176
Display         26232
Native          10927
Social           3679
Video             263
Other             171
dtype: int64

In [27]:
all_spend_df = spend_conv_total_df = df.loc[(df['spend'] == 0) &(df['impressions'] == 0) & (df['clicks'] == 0) 
                               & (df['total_conversions'] == 0)]
spend_conv_total_df.shape

(193238, 41)

In [28]:
spend_imp_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0)]
spend_imp_df.shape

(25210, 41)

In [29]:
spend_imp_df.value_counts('channel')

channel
Display        10755
Paid Search     7761
Native          6170
Social           345
Other            168
Video             11
dtype: int64

In [30]:
spend_clicks_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0) & (df['clicks'] != 0)]
spend_clicks_df.shape

(5578, 41)

In [31]:
spend_conv_ct_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0) & (df['clicks'] != 0) 
                               & (df['all_conversions_ct'] != 0)]
spend_conv_ct_df.shape

(34, 41)

In [32]:
spend_conv_vt_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0) & (df['clicks'] != 0) 
                               & (df['all_conversions_vt'] != 0)]
spend_conv_vt_df.shape

(1473, 41)

In [33]:
spend_conv_vt_df.value_counts('channel')

channel
Display    1282
Native      167
Other        19
Social        5
dtype: int64

In [34]:
spend_conv_total_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0) & (df['clicks'] != 0) 
                               & (df['total_conversions'] != 0)]
spend_conv_total_df.shape

(1487, 41)

In [35]:
spend_conv_total_df[['all_conversions_ct', 'all_conversions_vt','total_conversions']].sum()

all_conversions_ct      38.0
all_conversions_vt    3032.0
total_conversions     3070.0
dtype: float64

In [36]:
spend_conv_total_df.groupby('channel')[['all_conversions_ct', 'all_conversions_vt','total_conversions']].sum()

Unnamed: 0_level_0,all_conversions_ct,all_conversions_vt,total_conversions
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Display,26.0,2588.0,2614.0
Native,4.0,254.0,258.0
Other,1.0,102.0,103.0
Social,7.0,88.0,95.0


In [37]:
spend_both_conv_df = df.loc[(df['spend'] == 0) &(df['impressions'] != 0) & (df['clicks'] != 0) 
                               & (df['all_conversions_vt'] != 0) & (df['all_conversions_ct'] != 0)]
spend_both_conv_df.shape

(20, 41)

In [38]:
#look at date before row (21357204)
spend_both_conv_df.sort_values(by='date')

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
154432,New York Times Subscriptions,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Core,Social,Twitter,Twitter,0.0,36.0,2222.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2020-02-01,2020-02-10,2020-01-01,Social - Content/Audience Development,3.0,Social,Twitter,Core
412323,New York Times,NYTI_SAL_COREXXX_COUSA_DB_NA_WTCP,NYTI_SAL_COREXXX_COUSA_DB_NA_WTCP_LPRET_30XXXX...,Website Transaction - CPA,2020,3,9,35,2020-08-30,2020-09-03 00:00:00+00:00,Sale,Retargeting (LP visitors),Core,Native,Native,DBM_US,0.0,1.0,5.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2020-09-01,2020-08-31,2020-07-01,Native - Sale,2.0,Native,DBM_US,Core
538439,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-11 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,805.0,486713.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,10.0,0.0,0.0,0.0,0.0,10.0,10.0,2020-11-01,2020-11-09,2020-10-01,Other - Business As Usual,11.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
727976,New York Times,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP_1BEHA_20030D...,Website Transaction - CPA,2021,1,3,12,2021-03-21,2021-03-25 00:00:00+00:00,Sale,"Behavioral (1P: MC, ATC, site visitors, anons)",Core,Display,theTradeDesk,The Trade Desk,0.0,43.0,40125.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,2.0,3.0,2021-03-01,2021-03-22,2021-01-01,Display - Sale,4.0,Display,The Trade Desk,Core
744099,New York Times,NYTI_SAL_COREXXX_COUSA_TR_NA_WTCP,NYTI_SAL_COREXXX_COUSA_TR_NA_WTCP_LPRET_20XXXX...,Website Transaction - CPA,2021,2,4,14,2021-04-04,2021-04-05 00:00:00+00:00,Sale,Retargeting (LP visitors),Core,Native,Native,The Trade Desk,0.0,4.0,2427.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2021-04-01,2021-04-05,2021-04-01,Native - Sale,2.0,Native,The Trade Desk,Core
744101,New York Times,NYTI_SAL_COREXXX_COUSA_TR_NA_WTCP,NYTI_SAL_COREXXX_COUSA_TR_NA_WTCP_LPRET_20XXXX...,Website Transaction - CPA,2021,2,4,14,2021-04-04,2021-04-05 00:00:00+00:00,Sale,Retargeting (LP visitors),Core,Native,Native,The Trade Desk,0.0,5.0,3014.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2021-04-01,2021-04-05,2021-04-01,Native - Sale,2.0,Native,The Trade Desk,Core
781537,New York Times,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP_REGIS_40XXXX...,Website Transaction - CPA,2021,2,4,17,2021-04-25,2021-04-30 00:00:00+00:00,Sale,"1P (regis, formers)",Core,Display,theTradeDesk,The Trade Desk,0.0,12.0,14968.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,1.0,3.0,4.0,2021-04-01,2021-04-26,2021-04-01,Display - Sale,5.0,Display,The Trade Desk,Core
781538,New York Times,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP_REGIS_40XXXX...,Website Transaction - CPA,2021,2,4,17,2021-04-25,2021-04-30 00:00:00+00:00,Sale,"1P (regis, formers)",Core,Display,theTradeDesk,The Trade Desk,0.0,3.0,4129.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2021-04-01,2021-04-26,2021-04-01,Display - Sale,2.0,Display,The Trade Desk,Core
805128,New York Times,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP_3BEHA_00XXXX...,Website Transaction - CPA,2021,2,5,20,2021-05-16,2021-05-18 00:00:00+00:00,Sale,"Behavioral (2P, 3P)",Core,Display,theTradeDesk,The Trade Desk,0.0,100.0,95962.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,2.0,3.0,2021-05-01,2021-05-17,2021-04-01,Display - Sale,4.0,Display,The Trade Desk,Core


In [39]:
#check dates before
spend_check_1_df = df[(df['date'] == '2020-02-14') & (df['campaign'] == 'X-UNKNOWN-X')]
spend_check_1_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product


In [40]:
spend_check_4_df = df[(df['date'] == '2020-02-16') & (df['campaign'] == 'X-UNKNOWN-X')]
spend_check_4_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
156144,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,1958.0,338830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,35.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,35.0,Social,Twitter,Low-Volume Products


In [41]:
spend_check_5_df = df[(df['campaign'] == 'X-UNKNOWN-X') & (df['date'] >= '2020-02-14') & (df['date'] < '2020-03-01')]
spend_check_5_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
156144,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,1958.0,338830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,35.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,35.0,Social,Twitter,Low-Volume Products
159623,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-17 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
163066,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-18 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,1.0,Social,Twitter,Low-Volume Products
166561,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-19 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
170676,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-20 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
174408,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-21 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
178091,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-22 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
181759,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-23 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
185437,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-24 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-24,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products


In [42]:
spend_check_2_df = spend_check_1_df[(spend_check_1_df['spend'] == 0) & 
                 (spend_check_1_df['impressions'] != 0)]
spend_check_2_df.shape

(0, 41)

In [43]:
spend_check_3_df = df[(df['date'] == '2020-02-15') & (df['campaign'] == 'NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX')]
spend_check_3_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
150984,New York Times Subscriptions,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Core,Social,Twitter,Twitter,2540.020508,4550.0,287935.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Social - Content/Audience Development,1.0,Social,Twitter,Core


In [44]:
spend_check_7_df = df[(df['date'] <= '2020-11-10') & (df['campaign'] == '2020_HS_CORE_BRA_SWAY_NA_NA_Reddit')]
spend_check_7_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
528412,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-04 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-02,2020-10-01,Other - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
529836,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,903.0,505700.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,3.0,2020-11-01,2020-11-02,2020-10-01,Other - Business As Usual,3.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
531317,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-06 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,940.0,495556.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,4.0,4.0,2020-11-01,2020-11-02,2020-10-01,Other - Business As Usual,4.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
533042,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-07 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,817.0,434960.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,8.0,8.0,2020-11-01,2020-11-02,2020-10-01,Other - Business As Usual,8.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
534401,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-08 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,995.0,502342.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,2.0,5.0,7.0,2020-11-01,2020-11-02,2020-10-01,Other - Business As Usual,7.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
535729,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-09 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,931.0,496037.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,1.0,1.0,8.0,9.0,2020-11-01,2020-11-09,2020-10-01,Other - Business As Usual,9.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
537125,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-10 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,738.0,427494.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,1.0,0.0,0.0,8.0,8.0,2020-11-01,2020-11-09,2020-10-01,Other - Business As Usual,8.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products


In [45]:
spend_df[spend_df['impressions'] > 10000].shape

(1308, 41)

In [46]:
spend_df[spend_df['impressions'] > 100000].shape

(42, 41)

In [47]:
mill_df = spend_df[spend_df['impressions'] > 1000000]
mill_df

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
395812,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,3,8,33,2020-08-16,2020-08-19 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,85197.0,21478108.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-08-01,2020-08-17,2020-07-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products


In [48]:
spend_both_conv_df.sort_values(by='date').head()

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
154432,New York Times Subscriptions,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIFOODXX,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Core,Social,Twitter,Twitter,0.0,36.0,2222.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2020-02-01,2020-02-10,2020-01-01,Social - Content/Audience Development,3.0,Social,Twitter,Core
412323,New York Times,NYTI_SAL_COREXXX_COUSA_DB_NA_WTCP,NYTI_SAL_COREXXX_COUSA_DB_NA_WTCP_LPRET_30XXXX...,Website Transaction - CPA,2020,3,9,35,2020-08-30,2020-09-03 00:00:00+00:00,Sale,Retargeting (LP visitors),Core,Native,Native,DBM_US,0.0,1.0,5.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2020-09-01,2020-08-31,2020-07-01,Native - Sale,2.0,Native,DBM_US,Core
538439,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-11 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.0,805.0,486713.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,10.0,0.0,0.0,0.0,0.0,10.0,10.0,2020-11-01,2020-11-09,2020-10-01,Other - Business As Usual,11.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
727976,New York Times,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP,NYTI_SAL_COREXXX_COUSA_TR_DB_WTCP_1BEHA_20030D...,Website Transaction - CPA,2021,1,3,12,2021-03-21,2021-03-25 00:00:00+00:00,Sale,"Behavioral (1P: MC, ATC, site visitors, anons)",Core,Display,theTradeDesk,The Trade Desk,0.0,43.0,40125.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,2.0,3.0,2021-03-01,2021-03-22,2021-01-01,Display - Sale,4.0,Display,The Trade Desk,Core


In [49]:
spend_df.value_counts(['marketing_subinitiative'])

marketing_subinitiative
Core                       116957
X-UNKNOWN-X                 48331
Games                       32422
EDU                         21406
Cooking                      9635
Home Delivery                9302
Cooking Gift                  835
Audio                         592
Games Gift                    449
Sway                          268
dtype: int64

In [50]:
spend_conv_total_df.value_counts(['channel'])

channel
Display    1293
Native      168
Other        19
Social        7
dtype: int64

# 0.4 Column by Column 'X-UNKNOWN-X' Exploration 

## Account

In [51]:
df['account'].value_counts()

New York Times                  493612
NYT SA360                       235659
Growth - DR/Engagement           36487
New York Times DR                27081
In-House Subscriptions           24664
New York Times Cooking            8923
New York Times Subscriptions      5689
DR/Engagement Combined Q1.18       634
Media Team_Test Cell 2             231
New York Times Hearts              230
DISPLAY_NYT_Trueview               220
NYT Crossword - Bamboo             211
Audience Engagement 1              207
NYT Games                          195
NYTimes Events                      35
X-UNKNOWN-X                         34
Name: account, dtype: int64

In [52]:
df['account'].value_counts(normalize = True)

New York Times                  0.591781
NYT SA360                       0.282527
Growth - DR/Engagement          0.043744
New York Times DR               0.032467
In-House Subscriptions          0.029569
New York Times Cooking          0.010698
New York Times Subscriptions    0.006820
DR/Engagement Combined Q1.18    0.000760
Media Team_Test Cell 2          0.000277
New York Times Hearts           0.000276
DISPLAY_NYT_Trueview            0.000264
NYT Crossword - Bamboo          0.000253
Audience Engagement 1           0.000248
NYT Games                       0.000234
NYTimes Events                  0.000042
X-UNKNOWN-X                     0.000041
Name: account, dtype: float64

In [53]:
df[df['account'] == 'X-UNKNOWN-X']['total_conversions'].sum()

97.0

In [54]:
df[df['account'] == 'X-UNKNOWN-X']['all_conversions_vt'].sum()

85.0

In [55]:
df[df['account'] == 'X-UNKNOWN-X'].head()

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
156144,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,1958.0,338830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,35.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,35.0,Social,Twitter,Low-Volume Products
159623,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-17 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
163066,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-18 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,1.0,Social,Twitter,Low-Volume Products
166561,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-19 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products


## Campaign

In [56]:
print(df['account'].unique())

['Audience Engagement 1' 'In-House Subscriptions' 'NYT SA360'
 'New York Times' 'New York Times Subscriptions' 'NYT Crossword - Bamboo'
 'Growth - DR/Engagement' 'New York Times DR'
 'DR/Engagement Combined Q1.18' 'New York Times Hearts'
 'DISPLAY_NYT_Trueview' 'X-UNKNOWN-X' 'NYTimes Events'
 'Media Team_Test Cell 2' 'New York Times Cooking' 'NYT Games ']


In [57]:
percent_campaign_unknown_df = df['campaign'].value_counts(normalize = True)
percent_campaign_unknown_df['X-UNKNOWN-X']

4.076191206936239e-05

In [58]:
campaign_unknown = df[df['campaign'] == 'X-UNKNOWN-X']
campaign_unknown.shape

(34, 41)

In [59]:
campaign_unknown.head(10)

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
156144,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,1958.0,338830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,35.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,35.0,Social,Twitter,Low-Volume Products
159623,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-17 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
163066,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-18 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,1.0,Social,Twitter,Low-Volume Products
166561,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-19 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
170676,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-20 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
174408,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-21 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
178091,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-22 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
181759,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-23 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
185437,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-24 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-24,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products


## Match

In [60]:
percent_match_unknown_df = df['_match'].value_counts(normalize = True)
percent_match_unknown_df['X-UNKNOWN-X']

4.076191206936239e-05

In [61]:
match_unknown_df = df[df['_match'] == 'X-UNKNOWN-X']
match_unknown_df.shape

(34, 41)

**Seems like these 34 rows all have to do with Twitter and the unknown variables are throughout metadata/cols**

In [62]:
match_unknown_df.sort_values(by=['date'])

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
152694,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,88573.0,21357204.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,49.0,49.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,51.0,Social,Twitter,Low-Volume Products
156144,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,1958.0,338830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,35.0,2020-02-01,2020-02-10,2020-01-01,Social - X-UNKNOWN-X,35.0,Social,Twitter,Low-Volume Products
159623,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-17 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
163066,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-18 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,1.0,Social,Twitter,Low-Volume Products
166561,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-19 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
170676,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-20 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
174408,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-21 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,3.0,Social,Twitter,Low-Volume Products
178091,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-22 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
181759,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-23 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products
185437,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,2020,1,2,8,2020-02-23,2020-02-24 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Twitter,Twitter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-24,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Twitter,Low-Volume Products


## Campaign Objective

In [63]:
campaign_obj_df = df[df['campaign_objective'] == 'X-UNKNOWN-X']
campaign_obj_df.shape

(113930, 41)

In [64]:
df.value_counts('campaign_objective')

campaign_objective
Website Transaction - CPA    403260
Website Engagement           181087
X-UNKNOWN-X                  113930
Exact Match                   24439
Phrase Match                  23601
Broad Match Modified          23335
Prospecting                   17958
Remarketing                   14601
XXX                            9108
Subscriptions                  5824
Conquesting                    1664
Education                      1456
NonBrand Expansion             1384
Winback                        1352
eCPC Experiment                1238
ROW High                        998
Prospecting Control             917
ROW Low                         910
RSA Experiment REM              810
Control                         675
Dynamic Search Ads              617
Debates                         520
Remarketing Control             510
Exact RSA- Core                 506
Delivery                        321
Gifting                         302
Engager                         273
Remarketi

In [65]:
df['campaign_objective'].value_counts(normalize = True)

Website Transaction - CPA    0.483460
Website Engagement           0.217102
X-UNKNOWN-X                  0.136588
Exact Match                  0.029299
Phrase Match                 0.028295
Broad Match Modified         0.027976
Prospecting                  0.021529
Remarketing                  0.017505
XXX                          0.010919
Subscriptions                0.006982
Conquesting                  0.001995
Education                    0.001746
NonBrand Expansion           0.001659
Winback                      0.001621
eCPC Experiment              0.001484
ROW High                     0.001196
Prospecting Control          0.001099
ROW Low                      0.001091
RSA Experiment REM           0.000971
Control                      0.000809
Dynamic Search Ads           0.000740
Debates                      0.000623
Remarketing Control          0.000611
Exact RSA- Core              0.000607
Delivery                     0.000385
Gifting                      0.000362
Engager     

In [66]:
df[df['campaign_objective'] == 'X-UNKNOWN-X']['total_conversions'].sum()

167052.0

In [67]:
df[df['campaign_objective'] == 'X-UNKNOWN-X']['all_conversions_vt'].sum()

59360.0

In [68]:
df[df['campaign_objective'] == 'X-UNKNOWN-X']['all_conversions_ct'].sum()

107692.0

In [69]:
#seems to have unknowns for entire date range
campaign_obj_df.sort_values(by=['date'])

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
0,Audience Engagement 1,Book Review Live 2020,BRAND ASSET - B&W,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Social,Facebook,Facebook,94.010001,142.0,26617.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Social - X-UNKNOWN-X,0.0,Social,Facebook,Low-Volume Products
2436,NYT SA360,xSEM_Digital/HD/Shared,G_GSP_US_Shared,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,SA360,Google AdWords,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,X-UNKNOWN-X - X-UNKNOWN-X,0.0,Low-Volume Channels,Google AdWords,Low-Volume Products
2439,NYT SA360,xSEM_Digital/HD/Shared,G_GSP_US_Shared_YearInReview,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,SA360,Google AdWords,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,X-UNKNOWN-X - X-UNKNOWN-X,0.0,Low-Volume Channels,Google AdWords,Low-Volume Products
2441,NYT SA360,xSEM_Digital/HD/Shared,G_GSP_US_Subscribers_PA_Placeholder,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,SA360,Google AdWords,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,X-UNKNOWN-X - X-UNKNOWN-X,0.0,Low-Volume Channels,Google AdWords,Low-Volume Products
2448,NYT SA360,xSEM_Digital/HD/Shared,G_STD_US_Digital_NB_Winback_Exact,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,SA360,Google AdWords,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,X-UNKNOWN-X - Business As Usual,0.0,Low-Volume Channels,Google AdWords,Low-Volume Products
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
873109,New York Times Hearts,2020_HS_CORE_BRA_SWAY_NA_NA_Reddit,NT_AF_AUDI_BAU_SWAYSARAHCOOPER_USGM_SOC_RED_DB...,X-UNKNOWN-X,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Audio,Other,DCM,Reddit,0.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Other - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Low-Volume Products
873108,New York Times Subscriptions,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIBusEconXX,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIBUSECONX...,X-UNKNOWN-X,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Core,Social,Twitter,Twitter,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Content/Audience Development,0.0,Social,Twitter,Core
874486,DISPLAY_NYT_Trueview,NYTI_XXX_COREXXX_COUSA_AW_VI_WTCP_XXXXXXXXXX,NYTI_XXX_COREXXX_COUSA_AW_VI_WTCP_XXXXXXXXXX,X-UNKNOWN-X,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,Core,Youtube,Youtube,Adwords (Youtube),7974.505371,499.0,258390.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Youtube - X-UNKNOWN-X,0.0,Low-Volume Channels,Low-Volume Sites,Core
873736,New York Times Subscriptions,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFIScienceXX,NYTI_AUC_COREXXX_COUSA_TW_SO_WTCP_TAFISCIENCEX...,X-UNKNOWN-X,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Core,Social,Twitter,Twitter,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Content/Audience Development,0.0,Social,Twitter,Core


In [70]:
campaign_obj_df.value_counts('marketing_subinitiative')

marketing_subinitiative
X-UNKNOWN-X      48649
Games            26021
Core             18029
Cooking          15756
EDU               4888
Cooking Gift       208
Home Delivery      208
Audio              171
dtype: int64

In [71]:
campaign_obj_df.value_counts('channel')

channel
X-UNKNOWN-X    59207
Paid Search    40678
Social         13454
Display          244
Other            171
Youtube          164
Video             12
dtype: int64

In [72]:
campaign_obj_df[['marketing_subinitiative', 'channel']].loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
                                                            (df['channel'] == 'X-UNKNOWN-X')]

Unnamed: 0,marketing_subinitiative,channel
2395,X-UNKNOWN-X,X-UNKNOWN-X
2396,X-UNKNOWN-X,X-UNKNOWN-X
2397,X-UNKNOWN-X,X-UNKNOWN-X
2398,X-UNKNOWN-X,X-UNKNOWN-X
2399,X-UNKNOWN-X,X-UNKNOWN-X
...,...,...
521105,X-UNKNOWN-X,X-UNKNOWN-X
521106,X-UNKNOWN-X,X-UNKNOWN-X
521107,X-UNKNOWN-X,X-UNKNOWN-X
521108,X-UNKNOWN-X,X-UNKNOWN-X


## Year, Quarter, Month, week, week_start, Date

In [73]:
df[df['Year'] == 'X-UNKNOWN-X'].shape

(0, 41)

In [74]:
df[df['Quarter'] == 'X-UNKNOWN-X'].shape

(0, 41)

In [75]:
df[df['Month'] == 'X-UNKNOWN-X'].shape

(0, 41)

In [76]:
df[df['week'] == 'X-UNKNOWN-X'].shape

(0, 41)

In [77]:
df[df['week_start'] == 'X-UNKNOWN-X'].shape

(0, 41)

In [78]:
df[df['date'] == 'X-UNKNOWN-X'].shape

(0, 41)

## Marketing Initiative (Tactic)

In [79]:
df[df['marketing_initiative'] == 'X-UNKNOWN-X'].shape

(47719, 41)

In [80]:
#what do normal values look like
df.value_counts('marketing_initiative')

marketing_initiative
Business As Usual               327387
Sale                            226016
Content/Audience Development    209271
X-UNKNOWN-X                      47719
One Day Sale                     16743
Gifting                           4324
Testing                           1224
Engagement                        1144
December Holiday Cooking           104
Thanksgiving Cooking               104
Non-Specific                        54
Audience Content                    22
dtype: int64

In [81]:
df['marketing_initiative'].value_counts(normalize = True)

Business As Usual               0.392498
Sale                            0.270966
Content/Audience Development    0.250891
X-UNKNOWN-X                     0.057209
One Day Sale                    0.020073
Gifting                         0.005184
Testing                         0.001467
Engagement                      0.001372
Thanksgiving Cooking            0.000125
December Holiday Cooking        0.000125
Non-Specific                    0.000065
Audience Content                0.000026
Name: marketing_initiative, dtype: float64

In [82]:
df[df['marketing_initiative']== 'X-UNKNOWN-X'].shape

(47719, 41)

In [83]:
df[df['marketing_initiative'] == 'X-UNKNOWN-X']['total_conversions'].sum()

2039.0

In [84]:
df[df['marketing_initiative'] == 'X-UNKNOWN-X']['all_conversions_vt'].sum()

1945.0

In [85]:
df[df['marketing_initiative'] == 'X-UNKNOWN-X']['all_conversions_ct'].sum()

94.0

In [86]:
#didn't change anything df[df['marketing_initiative'] == 'X-UNKNOWN-X']['total_conversions'].sum()

#any campaigns with 'BAU' -> Business As Usual
df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['campaign'].str.contains('BAU')), 'marketing_initiative'] = 'Business As Usual'
df[df['marketing_initiative']== 'X-UNKNOWN-X'].shape

(47719, 41)

In [87]:
#didn't change anything


# #One Day Sale
# df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
#        (df['campaign'].str.contains('ODS')), 'marketing_initiative'] = 'One Day Sale'

# #Gifting
# #didn't change anything
# df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
#        (df['campaign'].str.contains('GFT')), 'marketing_initiative'] = 'Gifting'
       
       
# #Testing
# df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
#        (df['campaign'].str.contains('TST')), 'marketing_initiative'] = 'Testing'    

In [88]:
df[df['marketing_initiative']== 'X-UNKNOWN-X'].value_counts('channel')

channel
X-UNKNOWN-X    34528
Paid Search    12390
Social           543
Youtube          158
Display          100
dtype: int64

In [89]:
paid_search_mkt_init_df = df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Paid Search')]
paid_search_mkt_init_df.value_counts('campaign')

campaign
xNYTimes - Digital/HD/Shared    10296
NYTimes Crosswords               2080
1619_Search campaign               14
dtype: int64

In [90]:
unknown_channel_mkt_init_df = df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'X-UNKNOWN-X')]
unknown_channel_mkt_init_df.value_counts('campaign')

campaign
xSEM_Digital/HD/Shared            29640
xSEM_International Digi/Shared     3848
xSEM_EDU Domestic                   624
xSEM_EDU International              416
dtype: int64

In [91]:
social_mkt_init_df = df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Social')]
social_mkt_init_df.value_counts('campaign')

campaign
Travel Show                                     86
Verizon - Free HS Access Campaign               81
Preliminary Digital Event - V2                  41
WIRECUTTER_Q4 BRAND LIFT TEST                   36
Live At Home - June                             35
X-UNKNOWN-X                                     34
2020_NT_CORE_XXX_ProspectingDesktop_NA          24
Politics Live -                                 24
Digital Events Campaign - General               23
Book Review Live 2020                           21
2020_NT_CORE_XXX_SeasonalDesktop_NA             19
2020_NT_CORE_XXX_SiteVisitorsDesktop_NA         19
2020_NT_CORE_XXX_RonMobile_NA                   17
2020_NT_CORE_XXX_RonDesktop_NA                  17
2020_NT_CORE_XXX_ProspectingMobile_NA           16
2020_NT_CORE_XXX_SiteVisitorsMobile_NA          12
2020_NT_CORE_XXX_SeasonalMobile_NA              11
2020_NT_CORE_XXX_BooksDesktop_NA                 7
Event: The New York Times Travel Show 2020       6
Times Talks | The Seat

In [92]:
youtube_mkt_init_df = df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Youtube')]
youtube_mkt_init_df.value_counts('campaign')

campaign
NYTI_XXX_COREXXX_COUSA_AW_VI_WTCP_XXXXXXXXXX    158
dtype: int64

In [93]:
display_mkt_init_df = df.loc[(df['marketing_initiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Display')]
display_mkt_init_df.value_counts('campaign')

campaign
Test_DO NOT USE                      59
NYTI_TRK_SEARCHx_XXXXX_KS_SE_WTRF    35
Sandbox Campaign                      6
dtype: int64

## Marketing Segment

In [94]:
df[df['marketing_segment']== 'X-UNKNOWN-X'].shape

(106858, 41)

In [95]:
df['marketing_segment'].value_counts(normalize = True)

Behavioral (2P, 3P)                               0.184924
Behavioral (1P: MC, ATC, site visitors, anons)    0.149558
1P (regis, formers)                               0.146995
X-UNKNOWN-X                                       0.128110
Retargeting (LP visitors)                         0.056678
                                                    ...   
Games                                             0.000106
Website Transaction - CPA                         0.000101
Prospecting                                       0.000085
Registration                                      0.000085
Site Visitors                                     0.000001
Name: marketing_segment, Length: 62, dtype: float64

In [96]:
df[df['marketing_segment'] == 'X-UNKNOWN-X']['total_conversions'].sum()

167052.0

In [97]:
df[df['marketing_segment'] == 'X-UNKNOWN-X']['all_conversions_vt'].sum()

59360.0

In [98]:
#what do normal values look like
df.value_counts('marketing_segment')

marketing_segment
Behavioral (2P, 3P)                               154247
Behavioral (1P: MC, ATC, site visitors, anons)    124748
1P (regis, formers)                               122610
X-UNKNOWN-X                                       106858
Retargeting (LP visitors)                          47276
                                                   ...  
Games                                                 88
Website Transaction - CPA                             84
Registration                                          71
Prospecting                                           71
Site Visitors                                          1
Length: 62, dtype: int64

In [99]:
df[df['marketing_segment']== 'X-UNKNOWN-X'].value_counts('channel')

channel
X-UNKNOWN-X    58687
Paid Search    34126
Social         13454
Display          244
Other            171
Youtube          164
Video             12
dtype: int64

In [100]:
unknown_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'X-UNKNOWN-X')]
unknown_mkt_segment_df.value_counts('campaign')

campaign
xSEM_Digital/HD/Shared            32968
NYTI_BAU_CROSSWO_COUSA_AW_BR       7311
xSEM_International Digi/Shared     6032
NYTI_BAU_COOKING_COUSA_AW          3744
NYTI_BAU_COREXXX_COALL_AW_BR       2392
NYTI_BAU_CROSSWO_COALL_AW_BR       1456
NYTI_AUC_COREXXX_COUSA_AW_NB       1040
NYTI_BAU_EDUXXXX_COALL_AW_NB        936
NYTI_BAU_CROSSWO_COUSA_AW_NB        832
xSEM_EDU Domestic                   832
xSEM_EDU International              728
NYTI_BAU_COREXXX_COUSA_AW_BR        208
NYTI_BAU_COREXXX_COUSA_AW_NB        208
dtype: int64

In [101]:
paid_search_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Paid Search')]
paid_search_mkt_segment_df.value_counts('campaign')

campaign
xNYTimes - Digital/HD/Shared    13208
NYTimes Crosswords              10088
NYTI_BAU_COREXXX_COALL_BN_BR     6864
xNYTimes EDU                     2184
NYTimes Cooking                   832
NYTI_BAU_CROSSWO_COUSA_AW_BR      728
NYTI_BAU_COOKING_COUSA_AW         208
1619_Search campaign               14
dtype: int64

In [102]:
social_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Social')]
social_mkt_segment_df.value_counts('campaign')

campaign
NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX       3619
NYTI_BAU_COOKING_COUSA_FB_SO_BTCP_2019BRANDXXXX    2855
NYTI_BAU_COOKING_COUSA_FB_SO_BTCP_2020NEWXXXXXX    2640
NYTI_AUC_XXXXXXX_COUSA_FB_SO_WTCP_TAFI20_AUDT       337
2020_NT_CORE_XXX_RonDesktop_NA                      139
                                                   ... 
NYTI_XXX_COREXXX_COUSA_FB_SO_WTCP_TAFI20_TEST         2
SANDBOX_CAMPAIGN_JREYX_ALL_SO_WTCP                    2
NYTI_XXX_COREXXX_COUSA_SC_SO_WTCP_XXXXXXXXXX          1
Preliminary Digital Event                             1
NYTI_BAU_CROSSWD_COUSA_TW_SO_ATCP_AndRetargt          1
Length: 113, dtype: int64

In [103]:
display_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Display')]
display_mkt_segment_df.value_counts('campaign')

campaign
Test_DO NOT USE                                 59
NYTI_TST_COREXXX_COUSA_DB_DB_WTCP               40
NYTI_TRK_SEARCHx_XXXXX_KS_SE_WTRF               35
NYTI_BRA_COREXXX_COUSA_DR_DB_WTCP_EARTHDAYXX    25
NYTI_GFT_COREXXX_COUSA_XX_DB_WTCP               17
NYTI_BRA_COOKING_COUSA_DR_DB_WENG               14
NYTI_SAL_COREXXX_COUSA_DB_DB_WTCP               14
NYTI_AUC_COREXXX_COALL_DB_NA_WENG               11
TEST_SAL_STARKXX_COUSA_DB_DB_WTCP               11
NYTI_BRA_COREXXX_COUSA_DR_DB_BBND_DEBATESXXX     7
Sandbox Campaign                                 6
NYTI_SAL_COREXXX_COALL_DB_NA_WTCP                3
NYTI_AUC_COREXXX_COUSA_DB_NA_WENG                1
NYTI_BRA_COREXXX_COUSA_DR_VI_BBND_DEBATESXXX     1
dtype: int64

In [104]:
other_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Other')]
other_mkt_segment_df.value_counts('campaign')

campaign
2020_HS_CORE_BRA_SWAY_NA_NA_Reddit    171
dtype: int64

In [105]:
youtube_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Youtube')]
youtube_mkt_segment_df.value_counts('campaign')

campaign
NYTI_XXX_COREXXX_COUSA_AW_VI_WTCP_XXXXXXXXXX    158
NYTI_BAU_COOKING_COUSA_AW_VI_WTCP_XXXXXXXXXX      6
dtype: int64

In [106]:
video_mkt_segment_df = df.loc[(df['marketing_segment'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Video')]
video_mkt_segment_df.value_counts('campaign')

campaign
NYTI_BRA_COREXXX_COGBR_DR_VI_WENG               11
NYTI_BRA_COREXXX_COUSA_DR_VI_WTCP_GLGSXXXXXX     1
dtype: int64

## Marketing Subinitiative (Product)

In [107]:
df[df['marketing_subinitiative']== 'X-UNKNOWN-X'].shape

(48679, 41)

In [108]:
#what do normal values look like
df.value_counts('marketing_subinitiative')

marketing_subinitiative
Core             574806
Games             78861
Cooking           54365
X-UNKNOWN-X       48679
Home Delivery     48379
EDU               25891
Cooking Gift        943
Games Gift          881
Audio               738
Sway                569
dtype: int64

In [109]:
df['marketing_subinitiative'].value_counts(normalize = True)

Core             0.689123
Games            0.094545
Cooking          0.065177
X-UNKNOWN-X      0.058360
Home Delivery    0.058001
EDU              0.031040
Cooking Gift     0.001131
Games Gift       0.001056
Audio            0.000885
Sway             0.000682
Name: marketing_subinitiative, dtype: float64

In [110]:
df[df['marketing_subinitiative']== 'X-UNKNOWN-X'].value_counts('channel')

channel
X-UNKNOWN-X    36400
Paid Search    11766
Social           395
Display           88
Native            30
dtype: int64

In [111]:
unknown_mkt_subinit_df = df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'X-UNKNOWN-X')]
unknown_mkt_subinit_df.value_counts('campaign')

campaign
xSEM_Digital/HD/Shared            30992
xSEM_International Digi/Shared     5408
dtype: int64

In [112]:
paid_search_mkt_subinit_df = df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Paid Search')]
paid_search_mkt_subinit_df.value_counts('campaign')

campaign
xNYTimes - Digital/HD/Shared    11752
1619_Search campaign               14
dtype: int64

In [113]:
social_mkt_subinit_df = df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Social')]
social_mkt_subinit_df.value_counts('campaign')

campaign
Travel Show                                   86
Verizon - Free HS Access Campaign             81
Preliminary Digital Event - V2                41
WIRECUTTER_Q4 BRAND LIFT TEST                 36
Live At Home - June                           35
X-UNKNOWN-X                                   34
Politics Live -                               24
Digital Events Campaign - General             23
Book Review Live 2020                         21
Event: The New York Times Travel Show 2020     6
Times Talks | The Seattle Start-Up Engine      5
SANDBOX_CAMPAIGN_JREYX_ALL_SO_WTCP             2
Preliminary Digital Event                      1
dtype: int64

In [114]:
display_mkt_subinit_df = df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Display')]
display_mkt_subinit_df.value_counts('campaign')

campaign
Test_DO NOT USE                      59
NYTI_TRK_SEARCHx_XXXXX_KS_SE_WTRF    12
TEST_SAL_STARKXX_COUSA_DB_DB_WTCP    11
Sandbox Campaign                      6
dtype: int64

In [115]:
native_mkt_subinit_df = df.loc[(df['marketing_subinitiative'] == 'X-UNKNOWN-X') & 
       (df['channel'] == 'Native')]
native_mkt_subinit_df.value_counts('campaign')

campaign
DISPLAY_NYT_Trueview                            15
NYTI_BAU_CROSSWD_COUSA_GO_NA_WTCP_XXXXXXXXXX    15
dtype: int64

## Channel

**Look at unknowns -> meta data?**

In [116]:
df['channel'].value_counts()

Display        255366
Native         238861
Paid Search    175597
Social         104483
X-UNKNOWN-X     59207
Video             263
Other             171
Youtube           164
Name: channel, dtype: int64

In [117]:
df['channel'].value_counts(normalize = True)

Display        0.306153
Native         0.286366
Paid Search    0.210520
Social         0.125263
X-UNKNOWN-X    0.070982
Video          0.000315
Other          0.000205
Youtube        0.000197
Name: channel, dtype: float64

In [118]:
df[df['channel']== 'X-UNKNOWN-X'].shape

(59207, 41)

In [119]:
#marketing init, subinit, OR segment are unknown 
unknown_or_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
       (
          (df['marketing_initiative'] == 'X-UNKNOWN-X') | 
          (df['marketing_subinitiative'] == 'X-UNKNOWN-X') | 
          (df['marketing_segment'] == 'X-UNKNOWN-X')
       )]
unknown_or_df.shape

(58687, 41)

In [120]:
#all are unknown 
unknown_and_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
          (df['marketing_initiative'] == 'X-UNKNOWN-X') & 
          (df['marketing_subinitiative'] == 'X-UNKNOWN-X') &
          (df['marketing_segment'] == 'X-UNKNOWN-X')]
unknown_and_df.shape

(33280, 41)

In [121]:
#only mkt segment unknown
unknown_channel_mkt_segment_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
       (df['marketing_segment'] == 'X-UNKNOWN-X')]
unknown_channel_mkt_segment_df.shape

(58687, 41)

In [122]:
#only mkt initiative unknown
unknown_channel_mkt_init_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
       (df['marketing_initiative'] == 'X-UNKNOWN-X')]
unknown_channel_mkt_init_df.shape

(34528, 41)

In [123]:
#only mkt subinitiative unknown
unknown_channel_mkt_subinit_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
       (df['marketing_subinitiative'] == 'X-UNKNOWN-X')]
unknown_channel_mkt_subinit_df.shape

(36400, 41)

In [124]:
unknown_mkt_init_and_subinit_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
          (df['marketing_initiative'] == 'X-UNKNOWN-X') & 
          (df['marketing_subinitiative'] == 'X-UNKNOWN-X')]
unknown_mkt_init_and_subinit_df.shape

(33280, 41)

In [125]:
unknown_mkt_segment_and_init_df = df.loc[(df['channel'] == 'X-UNKNOWN-X') & 
          (df['marketing_segment'] == 'X-UNKNOWN-X') & 
          (df['marketing_initiative'] == 'X-UNKNOWN-X')]
unknown_mkt_segment_and_init_df.shape

(34528, 41)

In [126]:
#only clickthrough conversions recorded 
unknown_channel_df = df[df['channel']== 'X-UNKNOWN-X']
unknown_channel_df[['channel','all_conversions_ct','all_conversions_vt','total_conversions']].sum()

channel               X-UNKNOWN-XX-UNKNOWN-XX-UNKNOWN-XX-UNKNOWN-XX-...
all_conversions_ct                                                104.0
all_conversions_vt                                                  0.0
total_conversions                                                 104.0
dtype: object

In [127]:
#show the clickthrough conversions
#could relabel these as Paid Search???
unknown_channel_df[unknown_channel_df['all_conversions_ct'] > 0]

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
530516,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,132.039993,164.0,383.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-02,2020-10-01,X-UNKNOWN-X - Business As Usual,2.0,Low-Volume Channels,Google AdWords,Games
532015,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-06 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,171.389999,157.0,377.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-02,2020-10-01,X-UNKNOWN-X - Business As Usual,3.0,Low-Volume Channels,Google AdWords,Games
533734,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,44,2020-11-01,2020-11-07 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,153.619995,179.0,307.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-02,2020-10-01,X-UNKNOWN-X - Business As Usual,2.0,Low-Volume Channels,Google AdWords,Games
535078,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-08 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,227.850006,177.0,387.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-02,2020-10-01,X-UNKNOWN-X - Business As Usual,4.0,Low-Volume Channels,Google AdWords,Games
536476,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-09 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,149.839996,167.0,380.0,0.0,4.0,0.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,6.0,Low-Volume Channels,Google AdWords,Games
537809,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-10 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,190.539993,171.0,393.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,3.0,Low-Volume Channels,Google AdWords,Games
539138,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-11 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,214.369995,182.0,389.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,1.0,Low-Volume Channels,Google AdWords,Games
541859,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-13 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,160.399994,148.0,399.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,3.0,Low-Volume Channels,Google AdWords,Games
543237,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,45,2020-11-08,2020-11-14 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,204.660004,196.0,425.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,2.0,Low-Volume Channels,Google AdWords,Games
544587,NYT SA360,NYTI_BAU_CROSSWO_COUSA_AW_BR,NYTI_BAU_CROSSWO_cOUSA_AW_BR_SUBSX_RRS,X-UNKNOWN-X,2020,4,11,46,2020-11-15,2020-11-15 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Games,X-UNKNOWN-X,SA360,Google AdWords,151.300003,194.0,410.0,1.0,6.0,0.0,0.0,0.0,1.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-11-01,2020-11-09,2020-10-01,X-UNKNOWN-X - Business As Usual,7.0,Low-Volume Channels,Google AdWords,Games


## Platform

In [128]:
df['platform'].value_counts()

DCM                        246776
Native                     238861
Google Ads                 108293
SA360                      100599
Facebook                    62223
Snapchat                    36039
Bing                        26730
theTradeDesk                 7941
Twitter                      6131
AdWords (GDN, GVN, GSP)       251
Youtube                       164
LinkedIn                       69
Pinterest                      17
X-UNKNOWN-X                    14
Reddit                          4
Name: platform, dtype: int64

In [129]:
df['platform'].value_counts(normalize = True)

DCM                        0.295855
Native                     0.286366
Google Ads                 0.129830
SA360                      0.120606
Facebook                   0.074598
Snapchat                   0.043206
Bing                       0.032046
theTradeDesk               0.009520
Twitter                    0.007350
AdWords (GDN, GVN, GSP)    0.000301
Youtube                    0.000197
LinkedIn                   0.000083
Pinterest                  0.000020
X-UNKNOWN-X                0.000017
Reddit                     0.000005
Name: platform, dtype: float64

In [130]:
df[df['platform'] == 'X-UNKNOWN-X']

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
125216,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,5,2020-02-02,2020-02-08 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,34.290001,18.0,686.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-03,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
128525,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-09 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,981.390015,168.0,5435.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-03,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
131834,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-10 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,1444.390015,221.0,4658.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
135181,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-11 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,895.02002,116.0,2804.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
139368,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-12 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,461.910004,88.0,2150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
143117,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-13 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,746.669983,75.0,2168.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
146710,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-14 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,605.090027,58.0,3022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
150211,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,6,2020-02-09,2020-02-15 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,442.26001,48.0,2497.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
153665,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-16 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,501.790009,98.0,3776.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-10,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products
157115,DISPLAY_NYT_Trueview,1619_Search campaign,1619_SEARCH CAMPAIGN,X-UNKNOWN-X,2020,1,2,7,2020-02-16,2020-02-17 00:00:00+00:00,X-UNKNOWN-X,X-UNKNOWN-X,X-UNKNOWN-X,Paid Search,X-UNKNOWN-X,Adwords,781.169983,127.0,6625.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-01,2020-02-17,2020-01-01,Paid Search - X-UNKNOWN-X,0.0,Paid Search,Low-Volume Sites,Low-Volume Products


## Site

In [131]:
df['site'].value_counts()

DBM_US                                 483662
Google AdWords                         171813
Facebook                                62223
Bing Ads                                60525
Snap                                    36039
The Trade Desk                           9256
Twitter                                  6131
Google AdWords (DSA)                     3321
AdWords - Discovery                       346
GDN                                       251
Reddit                                    175
Adwords (Youtube)                         164
LinkedIn US                                69
Adwords                                    56
cnn.com                                    33
Pinterest                                  17
All4_GBR_1933                              11
Conde Nast Digital - The New Yorker         9
Bing US                                     5
TLC (Bon Appetit & Epicurious)              4
Vox                                         1
NBC Universal – NBC Entertainment 

In [132]:
df[df['site'] == 'X-UNKNOWN-X']

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product


# 0.5 Miscellaneous

In [133]:
# no longer using
unknown_data = {
    'account': [34],
    'campaign': [34],
    '_match': [34],
    'campaign_objective': [113623],
    'date': [0],
    'marketing_initiative': [47690],
    'marketing_segment': [146257],
    'marketing_subinitiative': [54373],
    'channel': [67093],
    'platform': [100]
}

In [134]:
# no longer using 
zero_data = {
    'spend': [287886],
    'clicks': [455041],
    'impressions': [257788],
    'digi_ada_conversions_ct': [848358],
    'games_conversions_ct': [866122],
    'edu_conversions_ct': [869552],
    'hd_conversions_ct': [863469], 
    'ck_conversions_ct': [866666],
    'core_conversions_ct': [839002],
    'all_conversions_ct': [837180],
    'digi_ada_conversions_vt': [743328],
    'games_conversions_vt': [850975],
    'edu_conversions_vt': [858446],
    'hd_conversions_vt': [853454],
    'ck_conversions_vt': [837689],
    'core_conversions_vt': [711931],
    'all_conversions_vt': [699470]   
}

In [135]:
#Site traffic
df['site'].value_counts()

DBM_US                                 483662
Google AdWords                         171813
Facebook                                62223
Bing Ads                                60525
Snap                                    36039
The Trade Desk                           9256
Twitter                                  6131
Google AdWords (DSA)                     3321
AdWords - Discovery                       346
GDN                                       251
Reddit                                    175
Adwords (Youtube)                         164
LinkedIn US                                69
Adwords                                    56
cnn.com                                    33
Pinterest                                  17
All4_GBR_1933                              11
Conde Nast Digital - The New Yorker         9
Bing US                                     5
TLC (Bon Appetit & Epicurious)              4
Vox                                         1
NBC Universal – NBC Entertainment 

In [136]:
df[df['site'] == 'DBM_US'].value_counts('channel')

channel
Display    246546
Native     237116
dtype: int64

In [137]:
# low volume vs high volume channels
df[df['channel'] == 'Video'][['all_conversions_vt', 'all_conversions_ct','total_conversions']].sum()

all_conversions_vt    3038.0
all_conversions_ct     151.0
total_conversions     3189.0
dtype: float64

In [138]:
df[df['channel'] == 'Video'].head(15)

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
14670,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,1,1,1,2020-01-05,2020-01-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
272024,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,2,4,16,2020-04-19,2020-04-21 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-04-01,2020-04-20,2020-04-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
286961,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,2,5,18,2020-05-03,2020-05-04 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-05-01,2020-05-04,2020-04-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
287700,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,2,5,18,2020-05-03,2020-05-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-05-01,2020-05-04,2020-04-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
288663,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,2,5,18,2020-05-03,2020-05-07 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-05-01,2020-05-04,2020-04-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
304486,New York Times,NYTI_BRA_COREXXX_COUSA_DR_VI_WTCP_GLGSXXXXXX,CONTX_000000FPM_WLCD_XXXX_BROADXXXXXXXX_XXXXX_...,X-UNKNOWN-X,2020,2,6,22,2020-05-31,2020-06-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,NBC Universal – NBC Entertainment,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-06-01,2020-06-01,2020-04-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
356872,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,3,7,27,2020-07-05,2020-07-05 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-07-01,2020-06-29,2020-07-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
372723,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,3,7,30,2020-07-26,2020-07-29 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-07-01,2020-07-27,2020-07-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
522957,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2020,4,10,43,2020-10-25,2020-10-30 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-10-01,2020-10-26,2020-10-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core
619373,New York Times,NYTI_BRA_COREXXX_COGBR_DR_VI_WENG,ADEMO_000000XXX_XXXX_XXXX_ALL4BVODABC1X_XXXXX_...,X-UNKNOWN-X,2021,1,1,1,2021-01-03,2021-01-03 00:00:00+00:00,Business As Usual,X-UNKNOWN-X,Core,Video,DCM,All4_GBR_1933,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-01-01,2020-12-28,2021-01-01,Video - Business As Usual,0.0,Low-Volume Channels,Low-Volume Sites,Core


# 0.6 Investigate why cooking has no paid search

## Switch to _vw table fixed it

In [139]:
ck_df = df[df['marketing_subinitiative']== 'Cooking']

In [140]:
ck_df.head()

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
61,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,BROAD_000000XXX_2PFB_XXXX_BROAD18XXXPRO_X1865_...,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Cooking,Social,Facebook,Facebook,2.64,0.0,1571.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,2020-01-01,2019-12-30,2020-01-01,Social - Content/Audience Development,1.0,Social,Facebook,Cooking
62,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,BROAD_000000XXX_2PFB_XXXX_BROAD18XXXPRO_X1865_...,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Cooking,Social,Facebook,Facebook,2.17,0.0,1369.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,2020-01-01,2019-12-30,2020-01-01,Social - Content/Audience Development,2.0,Social,Facebook,Cooking
63,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,BROAD_000000XXX_2PFB_XXXX_BROAD18XXXPRO_X1865_...,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Cooking,Social,Facebook,Facebook,14.45,11.0,2458.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Social - Content/Audience Development,0.0,Social,Facebook,Cooking
64,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,BROAD_000000XXX_2PFB_XXXX_BROAD18XXXPRO_X1865_...,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Cooking,Social,Facebook,Facebook,7.4,4.0,3053.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Social - Content/Audience Development,0.0,Social,Facebook,Cooking
65,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,BROAD_000000XXX_2PFB_XXXX_BROAD18XXXPRO_X1865_...,X-UNKNOWN-X,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,X-UNKNOWN-X,Cooking,Social,Facebook,Facebook,28.99,22.0,14497.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,1.0,2.0,3.0,2020-01-01,2019-12-30,2020-01-01,Social - Content/Audience Development,5.0,Social,Facebook,Cooking


In [141]:
ck_df['channel'].value_counts()

Social         33288
Paid Search     8657
Display         6672
X-UNKNOWN-X     4056
Native          1675
Video             11
Youtube            6
Name: channel, dtype: int64

In [142]:
paid_search_ck_df = ck_df[ck_df['channel'] == 'Paid Search']

In [143]:
paid_search_ck_df.shape

(8657, 41)

In [144]:
paid_search_ck_df.head()

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product
606,NYT SA360,NYTI_BAU_COOKING_COUSA_AW,NYTI_AUC_COOKING_COUSA_AW_NB_COREX_DSA,Dynamic Search Ads,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Content/Audience Development,New Users,Cooking,Paid Search,Google Ads,Google AdWords (DSA),3438.300049,22099.0,728099.0,7.0,2.0,0.0,0.0,42.0,7.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Paid Search - Content/Audience Development,51.0,Paid Search,Google AdWords (DSA),Cooking
607,NYT SA360,NYTI_BAU_COOKING_COUSA_AW,NYTI_BAU_COOKING_COUSA_AW_BR_BDLX2_BMM,Broad Match Modified,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Business As Usual,BundleX2,Cooking,Paid Search,Google Ads,Google AdWords,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Paid Search - Business As Usual,0.0,Paid Search,Google AdWords,Cooking
608,NYT SA360,NYTI_BAU_COOKING_COUSA_AW,NYTI_BAU_COOKING_COUSA_AW_BR_BDLX2_EXM,Exact Match,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Business As Usual,BundleX2,Cooking,Paid Search,Google Ads,Google AdWords,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Paid Search - Business As Usual,0.0,Paid Search,Google AdWords,Cooking
609,NYT SA360,NYTI_BAU_COOKING_COUSA_AW,NYTI_BAU_COOKING_COUSA_AW_BR_BDLX2_PRM,Phrase Match,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Business As Usual,BundleX2,Cooking,Paid Search,Google Ads,Google AdWords,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Paid Search - Business As Usual,0.0,Paid Search,Google AdWords,Cooking
610,NYT SA360,NYTI_BAU_COOKING_COUSA_AW,NYTI_BAU_COOKING_COUSA_AW_BR_BDLXX_BMM,Broad Match Modified,2020,1,1,0,2019-12-29,2020-01-01 00:00:00+00:00,Business As Usual,BundleXX,Cooking,Paid Search,Google Ads,Google AdWords,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-01,2019-12-30,2020-01-01,Paid Search - Business As Usual,0.0,Paid Search,Google AdWords,Cooking


In [145]:
# #drop cols you don't want in solution attempt
# df_copy = df_copy.drop(['account','campaign','_match','campaign_objective','Month','Quarter','week','week_start','date','marketing_initiative',
#               'marketing_segment','platform',
#              'digi_ada_conversions_ct','games_conversions_ct','edu_conversions_ct','hd_conversions_ct','ck_conversions_ct','core_conversions_ct',
#              'digi_ada_conversions_vt','games_conversions_vt','edu_conversions_vt','hd_conversions_vt','ck_conversions_vt','core_conversions_vt',
#              'year_week_monday','channel_mkt_init'], axis=1)
# df_copy.reset_index(inplace = True)

# 0.7 Create Dataframes Split By Channel

In [146]:
df['channel'].value_counts()

Display        255366
Native         238861
Paid Search    175597
Social         104483
X-UNKNOWN-X     59207
Video             263
Other             171
Youtube           164
Name: channel, dtype: int64

In [147]:
df_display = df[df['channel'] == 'Display']
df_display.shape

(255366, 41)

In [148]:
df_native = df[df['channel'] == 'Native']
df_native.shape

(238861, 41)

In [149]:
df_display_native = df[(df['channel'] == 'Display') | (df['channel'] == 'Native')]
df_display_native.shape

(494227, 41)

In [150]:
df_social = df[df['channel'] == 'Social']
df_social.shape

(104483, 41)

In [151]:
df_social['site'].value_counts()

Facebook       62223
Snap           36039
Twitter         6131
LinkedIn US       69
Pinterest         17
Reddit             4
Name: site, dtype: int64

In [152]:
df_fb = df_social[df_social['site'] == 'Facebook']
df_fb.shape

(62223, 41)

In [153]:
df_snap = df_social[df_social['site'] == 'Snap']
df_snap.shape

(36039, 41)

In [154]:
df_twitter= df_social[df_social['site'] == 'Twitter']
df_twitter.shape

(6131, 41)

# 1. Functions

## 1.1 Removal Functions

In [155]:
#returns df with outliers, low-vol channels, sites, and products removed

#impact of removals: 

#if ALL (outliers, paid search, all 3 low vol) removed, then the df is cut in half (832638 -> 481586)
#Outliers (702,551)
#Paid Search (657,082)
#Low-volume channels (772,840)
#low-volume sites (831,519)
#low-volume products (754,941)
#all 3 low-volume (733,921)

def removal(df):
    #remove_outliers
    df_spend = df
    q1, q3 = np.percentile(df_spend['spend'], [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - (iqr * 1.5)
    upper_bound = q3 + (iqr * 1.5)
    df_outliers_remov= df[df['spend'] > lower_bound][df['spend'] < upper_bound] 

    #remove Paid Search
    df_paid_search_remov = df_outliers_remov.loc[df['channel'] != 'Paid Search']
    
    #remove low-volume channels
    df_high_vol_channel = df_paid_search_remov.loc[df['new_channel'] != 'Low-Volume Channels']
    
    #remove low-volume sites
    df_high_vol_sites = df_high_vol_channel.loc[df['new_site'] != 'Low-Volume Sites']
    
    #remove low-volume products
    df_high_vol_products = df_high_vol_sites.loc[df['new_product'] != 'Low-Volume Products']
    
    #remove any June 2020 for Native -> infinity (spend = 0)
    df_remov_native_zero = df_high_vol_products.loc[~((df['year_month_cal'] == '2020-06-01') & 
                                             (df['channel'] == 'Native'))]
    
    #remove any June 2020 for Twitter -> infinity (spend = 0)
    df_remov_twitter_zero = df_remov_native_zero.loc[~((df['year_month_cal'] == '2020-06-01') & 
                                             (df['channel'] == 'Social') &
                                             (df['site'] == 'Twitter'))]
    
    return df_remov_twitter_zero

## 1.2 Multiplier Functions

In [156]:
#returns a df with the multiplier 
def df_multiplier(df):
    df_copy = df
    
    #drop cols you don't want in solution attempt
    df_copy = df_copy.drop(['account','campaign','_match','campaign_objective','Month','Quarter','week','week_start','date','marketing_initiative',
              'marketing_segment','platform',
             'digi_ada_conversions_ct','games_conversions_ct','edu_conversions_ct','hd_conversions_ct','ck_conversions_ct','core_conversions_ct',
             'digi_ada_conversions_vt','games_conversions_vt','edu_conversions_vt','hd_conversions_vt','ck_conversions_vt','core_conversions_vt',
             'year_week_monday','channel_mkt_init','new_channel','new_site','new_product'], axis=1)
    df_copy.reset_index(inplace = True)
    
    #group by channel, month, year
    df_copy = df_copy.groupby(['channel', 'year_month_cal','Year']).sum()
    
    #create vt percentage column
    df_copy['avg_monthly_vt_pctg'] = df_copy['all_conversions_vt'] / df_copy['total_conversions']
    
    #create ct percentage column
    df_copy['avg_monthly_ct_pctg'] = df_copy['all_conversions_ct'] / df_copy['total_conversions']
    
    #fix the nulls in vt pctg col
    df_copy.loc[(df_copy['total_conversions'] == 0), 'avg_monthly_vt_pctg'] = 0
    
    #fix the nulls in ct pctg col
    df_copy.loc[(df_copy['total_conversions'] == 0), 'avg_monthly_ct_pctg'] = 0
    
    #create overall ct column
    df_copy['overall_avg_ct_pctg'] = df_copy['avg_monthly_ct_pctg'].mean()
    
    #create individual multiplier for each row
    df_copy['indiv_multiplier'] = (1 - df_copy['avg_monthly_ct_pctg'])/df_copy['avg_monthly_ct_pctg']
    
    #create average multiplier over all rows 
    df_copy['avg_multiplier'] = df_copy['indiv_multiplier'].mean()
    
    return df_copy

In [157]:
#returns actual multiplier 
def actual_multiplier(df):
    df_copy = df
    return df_multiplier(df_copy)['avg_multiplier'][0]

## Eval Error Function

In [159]:
def eval_error(df):
    #create sample column 
    df_copy = df
    df_copy['sample'] = 'in samp'
    df_copy['sample'].loc[df['Year'] == 2021] = 'out samp'
    
    #run removal function
    df_removed = removal(df_copy)
    df_in_samp = df_removed.loc[df_removed['sample'] == 'in samp']
    df_out_samp = df_removed.loc[df_removed['sample'] == 'out samp']
    
    #get multiplier
    mult = actual_multiplier(df_in_samp)
    
    df_out_samp['projected'] = df_out_samp['all_conversions_ct'] * mult
    df_out_samp['abs_error'] = abs(df_out_samp['projected'] - df_out_samp['all_conversions_vt'])
    df_out_samp['abs_error_pctg'] = abs((df_out_samp['projected'] - df_out_samp['all_conversions_vt'])
                                    / df_out_samp['all_conversions_vt'])
    return df_out_samp.sort_values(by= ['abs_error_pctg'])

In [162]:
df_social_test = df_social
eval_error(df_social_test)

Unnamed: 0,account,campaign,_match,campaign_objective,Year,Quarter,Month,week,week_start,date,marketing_initiative,marketing_segment,marketing_subinitiative,channel,platform,site,spend,clicks,impressions,digi_ada_conversions_ct,games_conversions_ct,edu_conversions_ct,hd_conversions_ct,ck_conversions_ct,core_conversions_ct,all_conversions_ct,digi_ada_conversions_vt,games_conversions_vt,edu_conversions_vt,hd_conversions_vt,ck_conversions_vt,core_conversions_vt,all_conversions_vt,year_month_cal,year_week_monday,year_quarter,channel_mkt_init,total_conversions,new_channel,new_site,new_product,sample,projected,abs_error,abs_error_pctg
844103,Growth - DR/Engagement,NYTI_XXX_COREXXX_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_SAL_COREXXX_COUSA_FB_SO_WTCP_REGIS_XXXXXX...,Website Transaction - CPA,2021,2,6,24,2021-06-13,2021-06-15 00:00:00+00:00,Sale,"1P (regis, formers)",Core,Social,Facebook,Facebook,0.000000,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,4.0,18.0,1.0,0.0,2.0,0.0,20.0,21.0,2021-06-01,2021-06-14,2021-04-01,Social - Sale,25.0,Social,Facebook,Core,out samp,20.639830,0.360170,0.017151
819908,Growth - DR/Engagement,NYTI_XXX_COREXXX_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_SAL_COREXXX_COUSA_FB_SO_WTCP_3BEHA_000000...,Website Transaction - CPA,2021,2,5,21,2021-05-23,2021-05-29 00:00:00+00:00,Sale,"Behavioral (2P, 3P)",Core,Social,Facebook,Facebook,382.630005,44.0,47787.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,4.0,0.0,0.0,0.0,1.0,4.0,5.0,2021-05-01,2021-05-24,2021-04-01,Social - Sale,6.0,Social,Facebook,Core,out samp,5.159957,0.159957,0.031991
808041,In-House Subscriptions,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_AUC_COOKING_COUSA_FB_SO_WTCP_REGIS_410000...,Website Transaction - CPA,2021,2,5,20,2021-05-16,2021-05-21 00:00:00+00:00,Content/Audience Development,"1P (regis, formers)",Cooking,Social,Facebook,Facebook,104.870003,124.0,6940.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,2.0,3.0,5.0,2021-05-01,2021-05-17,2021-04-01,Social - Content/Audience Development,6.0,Social,Facebook,Cooking,out samp,5.159957,0.159957,0.031991
689567,Growth - DR/Engagement,NYTI_XXX_HOMEDEV_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_SAL_HOMEDEV_COUSA_FB_SO_WTCP_LPRET_30030D...,Website Transaction - CPA,2021,1,2,8,2021-02-21,2021-02-26 00:00:00+00:00,Sale,Retargeting (LP visitors),Home Delivery,Social,Facebook,Facebook,681.950012,36.0,14314.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,3.0,0.0,0.0,2.0,0.0,5.0,5.0,2021-02-01,2021-02-22,2021-01-01,Social - Sale,6.0,Social,Facebook,Home Delivery,out samp,5.159957,0.159957,0.031991
646083,New York Times DR,NYTI_XXX_COREXXX_COUSA_SC_SO_WTCP_XXXXXXXXXX,NYTI_SAL_COREXXX_COUSA_SC_SO_WTCP_REGIS_80XXXX...,Website Transaction - CPA,2021,1,1,4,2021-01-24,2021-01-25 00:00:00+00:00,Sale,"1P (regis, formers)",Core,Social,Snapchat,Snap,293.128479,147.0,37874.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,2021-01-01,2021-01-25,2021-01-01,Social - Sale,6.0,Social,Snap,Core,out samp,5.159957,0.159957,0.031991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874568,New York Times Subscriptions,NYTI_XXX_COREXXX_COUSA_TW_SO_WTCP_BOOKSXPROD,NYTI_SAL_COREXXX_COUSA_TW_SO_WTCP_3BEHA_00XXXX...,Website Transaction - CPA,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Sale,"Behavioral (2P, 3P)",Core,Social,Twitter,Twitter,351.203888,27.0,49925.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Sale,0.0,Social,Twitter,Core,out samp,0.000000,0.000000,
874570,New York Times Subscriptions,NYTI_XXX_COREXXX_COUSA_TW_SO_WTCP_PROSPECTXD,NYTI_SAL_COREXXX_COUSA_TW_SO_WTCP_3BEHA_00XXXX...,Website Transaction - CPA,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Sale,"Behavioral (2P, 3P)",Core,Social,Twitter,Twitter,291.990875,32.0,33963.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Sale,0.0,Social,Twitter,Core,out samp,0.000000,0.000000,
874577,Growth - DR/Engagement,NYTI_XXX_HOMEDEV_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_SAL_HOMEDEV_COUSA_FB_SO_WTCP_LPRET_30030D...,Website Transaction - CPA,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Sale,Retargeting (LP visitors),Home Delivery,Social,Facebook,Facebook,72.099998,6.0,537.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Sale,0.0,Social,Facebook,Home Delivery,out samp,0.000000,0.000000,
874578,Growth - DR/Engagement,NYTI_XXX_HOMEDEV_COUSA_FB_SO_WTCP_XXXXXXXXXX,NYTI_SAL_HOMEDEV_COUSA_FB_SO_WTCP_LPRET_30030D...,Website Transaction - CPA,2021,3,7,27,2021-07-04,2021-07-06 00:00:00+00:00,Sale,Retargeting (LP visitors),Home Delivery,Social,Facebook,Facebook,102.730003,4.0,1036.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021-07-01,2021-07-05,2021-07-01,Social - Sale,0.0,Social,Facebook,Home Delivery,out samp,0.000000,0.000000,


# Calculating Multipliers (Split by Channel, Aggregated by Month)

## Display Multiplier -> 153.30

In [163]:
#remove junk
df_display_removed = removal(df_display)

In [164]:
df_display.shape

(255366, 41)

In [165]:
df_display_removed.shape

(220194, 41)

In [166]:
actual_multiplier(df_display_removed)

157.17362903725888

In [167]:
df_multiplier(df_display_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Display,2020-01-01,2020,498626862,270731.655482,39761.0,120634178.0,86.0,5301.0,5387.0,0.984036,0.015964,0.009573,61.639535,157.173629
Display,2020-02-01,2020,1625339286,216512.703041,24255.0,81863970.0,57.0,6440.0,6497.0,0.991227,0.008773,0.009573,112.982456,157.173629
Display,2020-03-01,2020,2725721558,279458.244595,30471.0,103869593.0,74.0,11786.0,11860.0,0.993761,0.006239,0.009573,159.27027,157.173629
Display,2020-04-01,2020,2892154901,303385.192434,51975.0,177283781.0,161.0,11789.0,11950.0,0.986527,0.013473,0.009573,73.223602,157.173629
Display,2020-05-01,2020,1032888016,53316.873648,8031.0,26122329.0,67.0,5705.0,5772.0,0.988392,0.011608,0.009573,85.149254,157.173629
Display,2020-06-01,2020,551700661,20717.380069,2274.0,8511335.0,3.0,2739.0,2742.0,0.998906,0.001094,0.009573,913.0,157.173629
Display,2020-07-01,2020,3406685726,198445.733971,38352.0,77653920.0,70.0,7751.0,7821.0,0.99105,0.00895,0.009573,110.728571,157.173629
Display,2020-08-01,2020,4035579477,206438.003638,31949.0,71409842.0,53.0,6631.0,6684.0,0.992071,0.007929,0.009573,125.113208,157.173629
Display,2020-09-01,2020,4661834895,197496.180285,28522.0,70741083.0,65.0,6010.0,6075.0,0.9893,0.0107,0.009573,92.461538,157.173629
Display,2020-10-01,2020,8096200486,264911.08181,34344.0,79114480.0,84.0,8673.0,8757.0,0.990408,0.009592,0.009573,103.25,157.173629


## Native Multiplier 

In [168]:
#remove junk
df_native_removed = removal(df_native)

In [169]:
df_native.shape

(238861, 41)

In [170]:
df_native_removed.shape

(206188, 41)

In [171]:
actual_multiplier(df_native_removed)

80.22991760179416

In [172]:
df_multiplier(df_native_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Native,2020-01-01,2020,567534435,252398.520461,2884573.0,83427422.0,315.0,2556.0,2871.0,0.890282,0.109718,0.02185,8.114286,80.229918
Native,2020-02-01,2020,2624640505,318098.205794,2803260.0,122787967.0,401.0,3792.0,4193.0,0.904364,0.095636,0.02185,9.456359,80.229918
Native,2020-03-01,2020,2389768380,151331.016859,34724.0,53122128.0,39.0,4269.0,4308.0,0.990947,0.009053,0.02185,109.461538,80.229918
Native,2020-04-01,2020,304794677,31845.090294,10949.0,21471595.0,23.0,1348.0,1371.0,0.983224,0.016776,0.02185,58.608696,80.229918
Native,2020-05-01,2020,83402320,2906.709711,1655.0,1925512.0,9.0,1060.0,1069.0,0.991581,0.008419,0.02185,117.777778,80.229918
Native,2020-07-01,2020,1752681515,87151.035265,28214.0,42400607.0,21.0,2158.0,2179.0,0.990363,0.009637,0.02185,102.761905,80.229918
Native,2020-08-01,2020,5276494974,203818.569681,57595.0,84950474.0,45.0,3378.0,3423.0,0.986854,0.013146,0.02185,75.066667,80.229918
Native,2020-09-01,2020,5738827805,179541.858321,41199.0,66225239.0,31.0,3091.0,3122.0,0.99007,0.00993,0.02185,99.709677,80.229918
Native,2020-10-01,2020,7112134328,196356.743038,47587.0,73273982.0,37.0,3436.0,3473.0,0.989346,0.010654,0.02185,92.864865,80.229918
Native,2020-11-01,2020,6026530399,248467.407449,42490.0,63972096.0,31.0,2759.0,2790.0,0.988889,0.011111,0.02185,89.0,80.229918


## Display and Native Multiplier

In [173]:
df_display_native_removed = removal(df_display_native)

In [174]:
df_display_native.shape

(494227, 41)

In [175]:
df_display_native_removed.shape

(426385, 41)

In [176]:
actual_multiplier(df_display_native_removed)

118.49653924758934

In [177]:
df_multiplier(df_display_native_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Display,2020-01-01,2020,490259505,251834.809971,37717.0,114704034.0,81.0,5089.0,5170.0,0.984333,0.015667,0.015611,62.82716,118.496539
Display,2020-02-01,2020,1606377867,201872.887279,22826.0,77243263.0,54.0,6235.0,6289.0,0.991414,0.008586,0.015611,115.462963,118.496539
Display,2020-03-01,2020,2687265572,259915.52747,28687.0,97862257.0,71.0,11422.0,11493.0,0.993822,0.006178,0.015611,160.873239,118.496539
Display,2020-04-01,2020,2846933745,283486.88469,49043.0,166347790.0,155.0,11295.0,11450.0,0.986463,0.013537,0.015611,72.870968,118.496539
Display,2020-05-01,2020,1026487652,50713.624918,7892.0,25827393.0,66.0,5584.0,5650.0,0.988319,0.011681,0.015611,84.606061,118.496539
Display,2020-06-01,2020,548682017,19668.500725,2196.0,8180077.0,3.0,2636.0,2639.0,0.998863,0.001137,0.015611,878.666667,118.496539
Display,2020-07-01,2020,3370999929,186903.073261,35776.0,72891867.0,70.0,7484.0,7554.0,0.990733,0.009267,0.015611,106.914286,118.496539
Display,2020-08-01,2020,3995752605,194587.323701,30166.0,67946783.0,53.0,6400.0,6453.0,0.991787,0.008213,0.015611,120.754717,118.496539
Display,2020-09-01,2020,4610998881,183610.001404,27023.0,67059981.0,62.0,5819.0,5881.0,0.989458,0.010542,0.015611,93.854839,118.496539
Display,2020-10-01,2020,8029518531,249027.730737,32479.0,74994689.0,80.0,8421.0,8501.0,0.990589,0.009411,0.015611,105.2625,118.496539


## Social Multiplier

In [178]:
df_social_removed = removal(df_social)

In [179]:
df_social.shape

(104483, 42)

In [180]:
df_social_removed.shape

(91027, 42)

In [181]:
actual_multiplier(df_social_removed)

5.834204905464188

In [182]:
df_multiplier(df_social_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Social,2020-01-01,2020,378412789,747177.3,707949.0,193200880.0,4289.0,9065.0,13354.0,0.678823,0.321177,0.183316,2.113546,5.834205
Social,2020-02-01,2020,1267887819,787313.9,635335.0,175445666.0,2964.0,9997.0,12961.0,0.771314,0.228686,0.183316,3.372807,5.834205
Social,2020-03-01,2020,1429386730,733588.6,907575.0,232648999.0,4797.0,17279.0,22076.0,0.782705,0.217295,0.183316,3.602043,5.834205
Social,2020-04-01,2020,1751329851,883148.6,1484832.0,365496912.0,5978.0,21918.0,27896.0,0.785704,0.214296,0.183316,3.666444,5.834205
Social,2020-05-01,2020,970148715,178720.8,315931.0,45071027.0,3147.0,9132.0,12279.0,0.743709,0.256291,0.183316,2.901811,5.834205
Social,2020-06-01,2020,46196046,3640.15,11443.0,999736.0,70.0,347.0,417.0,0.832134,0.167866,0.183316,4.957143,5.834205
Social,2020-07-01,2020,680724997,492355.9,250449.0,131184803.0,919.0,8709.0,9628.0,0.904549,0.095451,0.183316,9.476605,5.834205
Social,2020-08-01,2020,1101877244,581110.9,239758.0,127953911.0,795.0,8048.0,8843.0,0.910098,0.089902,0.183316,10.12327,5.834205
Social,2020-09-01,2020,1340988944,599987.1,161222.0,87125979.0,990.0,7996.0,8986.0,0.889829,0.110171,0.183316,8.076768,5.834205
Social,2020-10-01,2020,2235047948,646450.8,301427.0,111621013.0,1545.0,7785.0,9330.0,0.834405,0.165595,0.183316,5.038835,5.834205


## Facebook Multiplier

In [183]:
df_fb_removed = removal(df_fb)

In [184]:
df_fb.shape

(62223, 41)

In [186]:
df_fb_removed.shape

(53569, 41)

In [187]:
actual_multiplier(df_fb_removed)

4.782731697646356

In [188]:
df_multiplier(df_fb_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Social,2020-01-01,2020,309929741,385191.060076,230935.0,74984331.0,2850.0,6000.0,8850.0,0.677966,0.322034,0.217025,2.105263,4.782732
Social,2020-02-01,2020,958602969,364887.020191,119995.0,41206848.0,1868.0,5934.0,7802.0,0.760574,0.239426,0.217025,3.17666,4.782732
Social,2020-03-01,2020,992554718,303859.120376,199093.0,53292360.0,3149.0,10318.0,13467.0,0.766169,0.233831,0.217025,3.276596,4.782732
Social,2020-04-01,2020,1217255965,345499.090181,280121.0,63545255.0,4152.0,11035.0,15187.0,0.726608,0.273392,0.217025,2.657755,4.782732
Social,2020-05-01,2020,848483309,132800.840175,236327.0,22300993.0,2794.0,8016.0,10810.0,0.741536,0.258464,0.217025,2.869005,4.782732
Social,2020-06-01,2020,46196046,3640.149984,11443.0,999736.0,70.0,347.0,417.0,0.832134,0.167866,0.217025,4.957143,4.782732
Social,2020-07-01,2020,255855529,147023.809778,20847.0,17524478.0,448.0,3245.0,3693.0,0.878689,0.121311,0.217025,7.243304,4.782732
Social,2020-08-01,2020,548427949,214130.919942,29421.0,25984230.0,466.0,3781.0,4247.0,0.890275,0.109725,0.217025,8.113734,4.782732
Social,2020-09-01,2020,689509924,206288.709918,23248.0,19616647.0,612.0,3535.0,4147.0,0.852423,0.147577,0.217025,5.776144,4.782732
Social,2020-10-01,2020,1218906902,214108.919788,78985.0,20935338.0,1013.0,3435.0,4448.0,0.772257,0.227743,0.217025,3.390918,4.782732


## Snap Multiplier

In [189]:
df_snap_removed = removal(df_snap)

In [190]:
df_snap.shape

(36039, 41)

In [191]:
df_snap_removed.shape

(31689, 41)

In [192]:
actual_multiplier(df_snap_removed)

15.016742507423702

In [193]:
df_multiplier(df_snap_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Social,2020-01-01,2020,43324854,216948.534572,524156.0,116926889.0,698.0,1757.0,2455.0,0.715682,0.284318,0.086554,2.517192,15.016743
Social,2020-02-01,2020,244846442,343842.579508,560644.0,136343057.0,752.0,3430.0,4182.0,0.820182,0.179818,0.086554,4.56117,15.016743
Social,2020-03-01,2020,306880055,352433.363711,756042.0,186241255.0,926.0,5799.0,6725.0,0.862305,0.137695,0.086554,6.262419,15.016743
Social,2020-04-01,2020,441297732,438715.798477,1218639.0,301960499.0,1094.0,9318.0,10412.0,0.894929,0.105071,0.086554,8.517367,15.016743
Social,2020-05-01,2020,91147097,22457.262899,67883.0,19310351.0,139.0,665.0,804.0,0.827114,0.172886,0.086554,4.784173,15.016743
Social,2020-07-01,2020,278263343,252154.391388,239120.0,111313560.0,249.0,4346.0,4595.0,0.945811,0.054189,0.086554,17.453815,15.016743
Social,2020-08-01,2020,427841732,270049.487275,212996.0,95967287.0,209.0,3477.0,3686.0,0.943299,0.056701,0.086554,16.636364,15.016743
Social,2020-09-01,2020,470672803,254002.463822,129526.0,55098092.0,145.0,3355.0,3500.0,0.958571,0.041429,0.086554,23.137931,15.016743
Social,2020-10-01,2020,782785736,314403.138724,194173.0,77205992.0,199.0,3264.0,3463.0,0.942535,0.057465,0.086554,16.40201,15.016743
Social,2020-11-01,2020,1724359322,493530.862434,308934.0,110754710.0,422.0,7039.0,7461.0,0.943439,0.056561,0.086554,16.680095,15.016743


## Twitter Multiplier

In [194]:
df_twitter_removed = removal(df_twitter)

In [195]:
df_twitter.shape

(6131, 41)

In [196]:
df_twitter_removed.shape

(5448, 41)

In [197]:
actual_multiplier(df_twitter_removed)

6.536600536424892

In [198]:
df_multiplier(df_twitter_removed)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,spend,clicks,impressions,all_conversions_ct,all_conversions_vt,total_conversions,avg_monthly_vt_pctg,avg_monthly_ct_pctg,overall_avg_ct_pctg,indiv_multiplier,avg_multiplier
channel,year_month_cal,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Social,2020-01-01,2020,22704859,225775.935412,160904.0,30493783.0,201.0,742.0,943.0,0.78685,0.21315,0.171772,3.691542,6.536601
Social,2020-02-01,2020,59717291,97890.922704,38383.0,11464039.0,228.0,355.0,583.0,0.608919,0.391081,0.171772,1.557018,6.536601
Social,2020-03-01,2020,125023918,100956.430481,91406.0,18689831.0,168.0,550.0,718.0,0.766017,0.233983,0.171772,3.27381,6.536601
Social,2020-04-01,2020,78564467,93846.280692,36009.0,18428940.0,65.0,615.0,680.0,0.904412,0.095588,0.171772,9.461538,6.536601
Social,2020-05-01,2020,23588247,7442.951611,2459.0,1434072.0,5.0,63.0,68.0,0.926471,0.073529,0.171772,12.6,6.536601
Social,2020-07-01,2020,148536913,120754.279264,32017.0,23552379.0,106.0,817.0,923.0,0.885157,0.114843,0.171772,7.707547,6.536601
Social,2020-08-01,2020,118933415,97731.771743,25605.0,17496850.0,17.0,256.0,273.0,0.937729,0.062271,0.171772,15.058824,6.536601
Social,2020-09-01,2020,171132354,140745.107224,32639.0,21572506.0,98.0,613.0,711.0,0.862166,0.137834,0.171772,6.255102,6.536601
Social,2020-10-01,2020,244360509,162263.521881,50682.0,26002070.0,168.0,742.0,910.0,0.815385,0.184615,0.171772,4.416667,6.536601
Social,2020-11-01,2020,133325676,142784.879704,41893.0,21740786.0,129.0,838.0,967.0,0.866598,0.133402,0.171772,6.496124,6.536601


# Appendix

In [199]:
# Attempts to group high vol and low vol (that didn't rly work)


#group low-volume channels, products, and sites, and split out channel-Social
# def new_channel(channel):
#     if (channel == 'Social') | (channel == 'Display') | (channel == 'Paid Search') | (channel == 'Native'):
#         return channel
#     else:
#         return 'Other'
    
# # for x in df_copy['channel']:
# #   print(new_channel(x))
    
# df_copy['new_channel'] = df_copy.apply(lambda x: new_channel(x['channel'],axis=1))
#~ -> not