In [60]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

# NOTES ON THIS VERSION

This version takes a Facebook, Pinterest, and Google ads CSV for the advertisers
It then takes an iOS and Android AppsFlyer CSV for the Installs/down the funnel events

The Facebook file has a column for "campaign name" which always has "IOS" or "ANDROID" in the title and "Evergreen" if it is one of the evergreen campaigns. This allows us to split up the performance by platform as well as by evergreen/non evergreen

# ADVERTISER DATA PREPARATION

### Global Variables

In [2]:
folder_with_csvs = "2_Tuesday"
folder_output_csvs = "2_Tuesday/Output"

### Extract Advertiser CSVs into DataFrames

In [3]:
facebook_file = folder_with_csvs + "/FACEBOOK.csv"
facebook_df = pd.read_csv(facebook_file)
display(facebook_df.head())


facebook_advertiser_string = "Facebook Ads"

type_evergreen_string = "Evergreen"
type_other_string = "Other"

Unnamed: 0,Day,Campaign Name,Amount Spent (USD),Impressions,Reach,Link Clicks,Link Clicks [On Ad],Unique Link Clicks,Unique Link Clicks [On Ad],Mobile App Installs,Unique Mobile App Tutorials Completed,Unique Mobile App Purchases,Reporting Starts,Reporting Ends
0,2019-05-07,bidalgo_2019_04_27 | Evergreen CPT with CBO an...,152.03,20013,16880,248.0,248.0,246.0,246.0,60.0,23.0,5.0,2019-05-07,2019-05-07
1,2019-05-07,bidalgo_2019_04 | Evergreen CPT with CBO and P...,422.8,49738,45193,387.0,387.0,386.0,386.0,130.0,70.0,16.0,2019-05-07,2019-05-07
2,2019-05-07,bidalgo_2018_06 | Launch Ads V1 | iOS | Moms 2...,0.13,23,23,1.0,1.0,1.0,1.0,,,,2019-05-07,2019-05-07
3,2019-05-07,bidalgo_2019_05_04 | CPI TEST Intimacy Ads | I...,48.63,10291,9858,84.0,84.0,84.0,84.0,22.0,12.0,1.0,2019-05-07,2019-05-07
4,2019-05-07,bidalgo_2018_07 | Launch Ads - VIDEO | iOS | M...,2.44,360,345,1.0,1.0,1.0,1.0,,,,2019-05-07,2019-05-07


In [4]:
pinterest_file = folder_with_csvs + "/PINTEREST.csv"
pinterest_df = pd.read_csv(pinterest_file)
display(pinterest_df.head())

Unnamed: 0,Campaign ID,Campaign name,Date,Campaign goal,Ad Format Type,Spend in account currency,Impressions,Clicks,Saves,Closeups,...,Earned Send Unique Users,Downstream Flag Unique Users,Sum of Installs from all Paid activity,Installs resulting from Paid clicks,Installs resulting from views on Paid promotion,Installs resulting from Paid Close Ups & Saves,Sum of Installs from all Earned activity,Installs resulting from Earned clicks,Installs resulting from views on Earned promotions,Installs resulting from Earned Close Ups & Saves
0,C626739958135,iOS - Prospecting - KW,2018-06-28,App_install_traffic,One Tap,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,C626739958135,iOS - Prospecting - KW,2018-06-29,App_install_traffic,One Tap,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,C626739958135,iOS - Prospecting - KW,2018-06-30,App_install_traffic,One Tap,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C626739958135,iOS - Prospecting - KW,2018-07-01,App_install_traffic,One Tap,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C626739958135,iOS - Prospecting - KW,2018-07-02,App_install_traffic,One Tap,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
google_file = folder_with_csvs + "/GOOGLE.csv"
google_df = pd.read_csv(google_file, skiprows=2)
display(google_df.head())

Unnamed: 0,Day,Campaign type,Campaign,Currency,Cost,Impressions,Views,Clicks
0,2019-04-24,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,514.63,95418,10756,279
1,2019-04-25,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,506.81,162903,13529,471
2,2019-04-26,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,81.79,46785,3566,137
3,2019-04-27,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,909.81,596122,23035,2005
4,2019-04-28,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,112.49,46903,2373,231


In [18]:
# asa_file = folder_with_csvs + "/ASA.csv"
# asa_df = pd.read_csv(asa_file)
# asa_df.head()
asa_advertiser_string = "Apple Search Ads"

type_basic_string = "Basic"
type_advanced_string = "Advanced"

### Transform FACEBOOK DataFrame

In [8]:
# Create a filtered dataframe from specific columns
facebook_cols = ["Day", "Campaign Name", "Amount Spent (USD)", 
                 "Impressions", "Reach", "Link Clicks", "Unique Link Clicks"]

facebook_transformed = facebook_df[facebook_cols].copy()

# Rename the column headers
facebook_transformed = facebook_transformed.rename(columns={"Day": "date",
                                                            "Campaign Name": "campaign_name",
                                                            "Amount Spent (USD)": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Reach": "impressions_unique",
                                                            "Link Clicks": "clicks",
                                                            "Unique Link Clicks": "clicks_unique"})


# # names for devices in each platform
# device_names_ios = ["ipod","iphone","ipad"]
# device_names_android = ["android_smartphone","android_tablet"]


# # add column to identify which platform (IOS or ANDROID) was being delivered impressions
# facebook_transformed["device_type"] = "other"
# facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_ios) ,["device_type"]] = "IOS"
# facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_android) ,["device_type"]] = "ANDROID"


facebook_transformed["device_type"] = "other"
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"

facebook_transformed["advertiser"] = facebook_advertiser_string

facebook_transformed["campaign_type"] = type_other_string
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)Evergreen'),"campaign_type"] = type_evergreen_string



# facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type"]).sum()
facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type", "advertiser", "campaign_type"],as_index=False).sum()

facebook_spend_reach_by_device_type_final = facebook_spend_reach_by_device_type.loc[facebook_spend_reach_by_device_type["device_type"] != "other", :]

# display(facebook_spend_reach_by_device_type_final.head())



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
# facebook_spend_reach_by_device_type_final["advertiser"] = facebook_non_eg_string

facebook_spend_reach_clicks_ios = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "IOS",:]
facebook_spend_reach_clicks_android = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

# pinterest_spend_reach_clicks_android = 


display(facebook_spend_reach_clicks_ios.head())
display(facebook_spend_reach_clicks_android.tail())




Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique
0,2018-06-28,IOS,Facebook Ads,Other,464.73,54871,51019,245.0,241.0
1,2018-06-29,IOS,Facebook Ads,Other,1010.64,86403,82885,473.0,467.0
2,2018-06-30,IOS,Facebook Ads,Other,1006.94,76443,74392,478.0,472.0
3,2018-07-01,IOS,Facebook Ads,Other,1072.97,92433,87127,668.0,662.0
4,2018-07-02,IOS,Facebook Ads,Other,940.32,90790,86254,585.0,581.0


Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique
470,2019-05-04,ANDROID,Facebook Ads,Evergreen,497.68,63057,48960,588.0,585.0
471,2019-05-04,ANDROID,Facebook Ads,Other,109.88,16523,14355,114.0,114.0
474,2019-05-05,ANDROID,Facebook Ads,Evergreen,500.84,66278,54112,781.0,772.0
477,2019-05-06,ANDROID,Facebook Ads,Evergreen,502.1,71432,56880,769.0,761.0
480,2019-05-07,ANDROID,Facebook Ads,Evergreen,152.03,20013,16880,248.0,246.0


### Transform PINTEREST DataFrame

In [12]:
pinterest_cols = ["Campaign ID", "Campaign name", "Date", "Spend in account currency", 
                  "Impressions", "Impression Unique Users", "Clicks", "Click Unique Users"]

pinterest_transformed = pinterest_df[pinterest_cols].copy()

# Rename the column headers
pinterest_transformed = pinterest_transformed.rename(columns={"Campaign ID": "campaign_id",
                                                         "Campaign name": "campaign_name",
                                                         "Date": "date",
                                                         "Spend in account currency": "spend",
                                                         "Impressions": "impressions",
                                                         "Impression Unique Users": "impressions_unique",
                                                         "Clicks": "clicks",
                                                         "Click Unique Users": "clicks_unique"})

pinterest_transformed["device_type"] = "other"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


pinterest_transformed["advertiser"] = "pinterest_int"
pinterest_transformed["campaign_type"] = type_other_string


pinterest_spend_reach_by_device_type = pinterest_transformed.groupby(["date","device_type", "advertiser", "campaign_type"],as_index=False).sum()

pinterest_spend_reach_by_device_type_final = pinterest_spend_reach_by_device_type.loc[pinterest_spend_reach_by_device_type["device_type"] != "other", :]

pinterest_spend_reach_by_device_type_final.head(10)



# add the AppsFlyer advertiser name and 
# pinterest_spend_reach_by_device_type_final["advertiser"] = "pinterest_int"
# pinterest_spend_reach_by_device_type_final["campaign_type"] = type_other_string


# Split out IOS and ANDROID into separate dataframes
pinterest_spend_reach_clicks_ios = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "IOS",:]
pinterest_spend_reach_clicks_android = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]



display(pinterest_spend_reach_clicks_ios.head())
display(pinterest_spend_reach_clicks_android.tail())

Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique
1,2018-06-28,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0
4,2018-06-29,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0
7,2018-06-30,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0
10,2018-07-01,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0
13,2018-07-02,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique
924,2019-05-02,ANDROID,pinterest_int,Other,276.9,273332.0,165380.0,1589.0,1566.0
927,2019-05-03,ANDROID,pinterest_int,Other,247.51,250469.0,151636.0,1409.0,1390.0
930,2019-05-04,ANDROID,pinterest_int,Other,232.0,224720.0,135019.0,1311.0,1294.0
933,2019-05-05,ANDROID,pinterest_int,Other,278.3,252911.0,148510.0,1599.0,1577.0
936,2019-05-06,ANDROID,pinterest_int,Other,271.45,278239.0,163258.0,1576.0,1548.0


### Transform GOOGLE ADS DataFrame

In [13]:
# Create a filtered dataframe from specific columns
google_cols = ["Day", "Campaign", "Cost", 
                 "Impressions", "Views", "Clicks"]

google_transformed = google_df[google_cols].copy()

# Rename the column headers
google_transformed = google_transformed.rename(columns={"Day": "date",
                                                            "Campaign": "campaign_name",
                                                            "Cost": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Views": "views",
                                                            "Clicks": "clicks"})

# display(google_transformed["spend"].dtypes)

google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)

# pd.to_numeric(s, errors='ignore')

# pinterest_transformed["device_type"] = "other"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


# add the Device Type, AppsFlyer advertiser name 
google_transformed["device_type"] = "IOS"
google_transformed["advertiser"] = "googleadwords_int"
google_transformed["campaign_type"] = type_other_string


google_spend_reach_by_device_type = google_transformed.groupby(["date","device_type", "advertiser", "campaign_type"],as_index=False).sum()

google_spend_reach_by_device_type_final = google_spend_reach_by_device_type.loc[google_spend_reach_by_device_type["device_type"] != "other", :]

# display(google_spend_reach_by_device_type_final.head())



# Split out IOS and ANDROID into separate dataframes
google_spend_reach_clicks_ios = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "IOS",:]
google_spend_reach_clicks_android = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

display(google_spend_reach_clicks_ios.head())
display(google_spend_reach_clicks_android.tail())




Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,views,clicks
0,2019-04-24,IOS,googleadwords_int,Other,514.63,95418,10756,279
1,2019-04-25,IOS,googleadwords_int,Other,506.81,162903,13529,471
2,2019-04-26,IOS,googleadwords_int,Other,81.79,46785,3566,137
3,2019-04-27,IOS,googleadwords_int,Other,909.81,596122,23035,2005
4,2019-04-28,IOS,googleadwords_int,Other,112.49,46903,2373,231


Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,views,clicks


In [None]:
# google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
# google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
# google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)
# # google_transformed[["impressions", "views"]] = google_transformed[["impressions", "views"]].apply(pd.to_numeric)


# google_transformed.head()

# APPSFLYER DATA PREPARATION

### Extract APPSFLYER CSVs into DataFrames

In [14]:
appsflyer_ios_file = folder_with_csvs + "/APPSFLYER-IOS.csv"
appsflyer_ios_df = pd.read_csv(appsflyer_ios_file)
display(appsflyer_ios_df.head(100))

Unnamed: 0,Date,Agency/PMD (af_prt),Media Source (pid),Campaign (c),Impressions,Clicks,CTR,Installs,Conversion Rate,Sessions,...,ltv_20190501 (Sales in USD),ltv_20190504 (Unique users),ltv_20190504 (Event counter),ltv_20190504 (Sales in USD),ltv_20190505 (Unique users),ltv_20190505 (Event counter),ltv_20190505 (Sales in USD),new_workout_saved (Unique users),new_workout_saved (Event counter),new_workout_saved (Sales in USD)
0,2018-06-28,,Deep Linking,Deep Link Redirect iOS,,34.0,,2,0.0588,6,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
1,2018-06-28,,Email,BetaLaunchEmail,,17.0,,0,0.0000,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
2,2018-06-28,,Facebook Ads,"2018_06 | Launch Ads V1 | iOS | Moms 25-39 | ""...",12451.0,45.0,0.0036,2,0.0444,5,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
3,2018-06-28,,Facebook Ads,"2018_06 | Launch Ads V1 | iOS | Moms 25-39 | ""...",11219.0,45.0,0.0040,5,0.1111,13,...,0.0,0,0,0.0,0,0,0.0,3,5,0.0
4,2018-06-28,,Facebook Ads,"2018_06 | Launch Ads V1 | iOS | Moms 25-39 | ""...",8276.0,39.0,0.0047,7,0.1795,22,...,0.0,0,0,0.0,0,0,0.0,1,1,0.0
5,2018-06-28,,Facebook Ads,"2018_06 | Launch Ads V1 | iOS | Moms 25-39 | ""...",12918.0,50.0,0.0039,6,0.1200,15,...,0.0,0,0,0.0,0,0,0.0,2,2,0.0
6,2018-06-28,,Facebook Ads,2018_06 | Launch Ads V1 | iOS | Moms 25-39 | 4...,7668.0,58.0,0.0076,6,0.1034,477,...,0.0,0,0,0.0,0,0,0.0,4,28,0.0
7,2018-06-28,,InfluencerYouTube,carissa_nunez,,3.0,,0,0.0000,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
8,2018-06-28,,Organic,,,,,100,,0,...,0.0,0,0,0.0,0,0,0.0,29,143,0.0
9,2018-06-28,,SocialFacebook,LaunchPost,,10.0,,0,0.0000,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0


In [15]:
appsflyer_android_file = folder_with_csvs + "/APPSFLYER-ANDROID.csv"
appsflyer_android_df = pd.read_csv(appsflyer_android_file)
display(appsflyer_android_df.head())

Unnamed: 0,Date,Agency/PMD (af_prt),Media Source (pid),Campaign (c),Impressions,Clicks,CTR,Installs,Conversion Rate,Sessions,...,ltv_20190501 (Sales in USD),ltv_20190504 (Unique users),ltv_20190504 (Event counter),ltv_20190504 (Sales in USD),ltv_20190505 (Unique users),ltv_20190505 (Event counter),ltv_20190505 (Sales in USD),new_workout_saved (Unique users),new_workout_saved (Event counter),new_workout_saved (Sales in USD)
0,2018-06-28,,Organic,,,,,10,,0,...,0.0,0,0,0.0,0,0,0.0,4,16,0.0
1,2018-06-28,,Website,MoovStudioLandingPageAndroid,,1.0,,0,0.0,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
2,2018-06-28,,Website,MoovStudioSite,,3.0,,0,0.0,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
3,2018-06-28,,Website,Navigation Bar App Download Button,,1.0,,0,0.0,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0
4,2018-06-29,,Organic,,,,,9,,0,...,0.0,0,0,0.0,0,0,0.0,4,10,0.0


### SETUP VARIABLES TO USE FOR APPSFLYER IOS/ANDROID DataFrame Transformations

In [19]:
most_recent_ltv_column_prefix = "ltv_20190504"
paid_acquisition_advertisers = [facebook_advertiser_string, "pinterest_int", "googleadwords_int", asa_advertiser_string, "Organic"]


appsflyer_cols = ["Date", "Media Source (pid)", "Campaign (c)", "Installs", "Sessions", 
                      "new_workout_saved (Unique users)", 
                      "af_purchase (Unique users)",
                      "af_purchase (Event counter)", 
                      "af_start_trial (Unique users)", 
                      "af_start_trial (Event counter)",
                      most_recent_ltv_column_prefix + " (Unique users)",
                      most_recent_ltv_column_prefix + " (Event counter)",
                      most_recent_ltv_column_prefix + " (Sales in USD)"]

### Transform APPSFLYER IOS DataFrame

In [38]:
appsflyer_ios_transformed = appsflyer_ios_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_ios_transformed = appsflyer_ios_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Campaign (c)": "campaign_name",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})


# facebook_advertiser_string = "Facebook Ads"
# asa_advertiser_string = "Apple Search Ads"

# type_evergreen_string = "Evergreen"
# type_other_string = "Other
# type_basic_string = "Basic"
# type_advanced_string = "Advanced"


appsflyer_ios_transformed.insert(2, "campaign_type", type_other_string) 


# appsflyer_ios_transformed.loc[appsflyer_ios_transformed['advertiser'] == facebook_advertiser_string, "campaign_type"] = type_other_string

appsflyer_ios_transformed.loc[(appsflyer_ios_transformed['advertiser'] == facebook_advertiser_string)
                              &
                              (appsflyer_ios_transformed['campaign_name'].str.contains('(?i)Evergreen')), "campaign_type"] = type_evergreen_string



appsflyer_ios_transformed.loc[appsflyer_ios_transformed['advertiser'] == asa_advertiser_string, "campaign_type"] = type_advanced_string

appsflyer_ios_transformed.loc[(appsflyer_ios_transformed['advertiser'] == asa_advertiser_string)
                              &
                              (appsflyer_ios_transformed['campaign_name'].str.contains('Moov Studio: Fitness Workouts')), "campaign_type"] = type_basic_string



appsflyer_ios_transformed.insert(11, "trial_starts_unique", appsflyer_ios_transformed["af_purchase_unique"] + appsflyer_ios_transformed["af_start_trial_unique"]) 
appsflyer_ios_transformed.insert(12, "trial_starts_all", appsflyer_ios_transformed["af_purchase_all"] + appsflyer_ios_transformed["af_start_trial_all"]) 

# appsflyer_ios_transformed["trial_starts_total_unique"] = appsflyer_ios_transformed["af_purchase_unique"] + appsflyer_ios_transformed["af_start_trial_unique"]
# appsflyer_ios_transformed["trial_starts_total_all"] = appsflyer_ios_transformed["af_purchase_all"] + appsflyer_ios_transformed["af_start_trial_all"]

appsflyer_ios_grouped = appsflyer_ios_transformed.groupby(["date","advertiser", "campaign_type"],as_index=False).sum()

appsflyer_ios_grouped_final = appsflyer_ios_grouped.loc[appsflyer_ios_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_ios_grouped_final.head(100)



Unnamed: 0,date,advertiser,campaign_type,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
2,2018-06-28,Facebook Ads,Other,26,532,10,1,1,0,0,1,1,0,0,0.0
4,2018-06-28,Organic,Other,100,0,29,2,3,0,0,2,3,0,0,0.0
13,2018-06-29,Facebook Ads,Other,87,330,36,2,2,0,0,2,2,0,0,0.0
15,2018-06-29,Organic,Other,79,0,26,1,1,0,0,1,1,0,0,0.0
21,2018-06-30,Facebook Ads,Other,120,427,48,3,3,0,0,3,3,0,0,0.0
22,2018-06-30,Organic,Other,60,0,12,0,0,0,0,0,0,0,0,0.0
27,2018-07-01,Apple Search Ads,Advanced,0,0,0,0,0,0,0,0,0,0,0,0.0
29,2018-07-01,Facebook Ads,Other,151,603,60,4,4,0,0,4,4,0,0,0.0
30,2018-07-01,Organic,Other,180,0,62,6,7,0,0,6,7,0,0,0.0
35,2018-07-02,Apple Search Ads,Advanced,0,0,0,0,0,0,0,0,0,0,0,0.0


In [None]:
# organic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == "Organic", ["date", "advertiser"]]
# organic_spend_reach_clicks_ios["device_type"] = "IOS"
# organic_spend_reach_clicks_ios.head()

### Transform APPSFLYER ANDROID DataFrame

In [44]:
appsflyer_android_transformed = appsflyer_android_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_android_transformed = appsflyer_android_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Campaign (c)": "campaign_name",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})

# facebook_advertiser_string = "Facebook Ads"
# asa_advertiser_string = "Apple Search Ads"

# type_evergreen_string = "Evergreen"
# type_other_string = "Other
# type_basic_string = "Basic"
# type_advanced_string = "Advanced"

appsflyer_android_transformed.insert(2, "campaign_type", type_other_string) 


appsflyer_android_transformed.loc[(appsflyer_android_transformed['advertiser'] == facebook_advertiser_string)
                              &
                              (appsflyer_android_transformed['campaign_name'].str.contains('(?i)Evergreen')), "campaign_type"] = type_evergreen_string


# appsflyer_android_transformed.loc[appsflyer_android_transformed['campaign_name'].str.contains('(?i)Evergreen'),"campaign_type"] = facebook_eg_string

appsflyer_android_transformed.insert(11, "trial_starts_unique", appsflyer_android_transformed["af_purchase_unique"] + appsflyer_android_transformed["af_start_trial_unique"]) 
appsflyer_android_transformed.insert(12, "trial_starts_all", appsflyer_android_transformed["af_purchase_all"] + appsflyer_android_transformed["af_start_trial_all"]) 

# appsflyer_android_transformed["trial_starts_total_unique"] = appsflyer_android_transformed["af_purchase_unique"] + appsflyer_android_transformed["af_start_trial_unique"]
# appsflyer_android_transformed["trial_starts_total_all"] = appsflyer_android_transformed["af_purchase_all"] + appsflyer_android_transformed["af_start_trial_all"]

appsflyer_android_grouped = appsflyer_android_transformed.groupby(["date","advertiser", "campaign_type"],as_index=False).sum()

appsflyer_android_grouped_final = appsflyer_android_grouped.loc[appsflyer_android_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_android_grouped_final.head(10)



Unnamed: 0,date,advertiser,campaign_type,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2018-06-28,Organic,Other,10,0,4,0,0,0,0,0,0,0,0,0.0
2,2018-06-29,Organic,Other,9,0,4,0,0,0,0,0,0,0,0,0.0
4,2018-06-30,Organic,Other,9,0,3,0,0,0,0,0,0,0,0,0.0
6,2018-07-01,Organic,Other,12,0,2,0,0,0,0,0,0,0,0,0.0
8,2018-07-02,Organic,Other,7,0,1,0,0,0,0,0,0,0,0,0.0
11,2018-07-03,Organic,Other,12,0,3,0,1,0,0,0,1,0,0,0.0
14,2018-07-04,Organic,Other,8,0,3,0,0,0,0,0,0,0,0,0.0
15,2018-07-05,Organic,Other,9,0,3,0,0,0,0,0,0,0,0,0.0
17,2018-07-06,Organic,Other,4,0,0,0,0,0,0,0,0,0,0,0.0
18,2018-07-07,Organic,Other,3,0,1,0,0,0,0,0,0,0,0,0.0


### Create Placeholders for Organic, ASA (Basic and Advanced) DataFrame with Spend, Impressions, Clicks, etc.

In [39]:
# facebook_advertiser_string = "Facebook Ads"
# asa_advertiser_string = "Apple Search Ads"

# type_evergreen_string = "Evergreen"
# type_other_string = "Other
# type_basic_string = "Basic"
# type_advanced_string = "Advanced"



# ORGANIC IOS
organic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == "Organic", ["date", "advertiser", "campaign_type"]]
organic_spend_reach_clicks_ios["device_type"] = "IOS"
display(organic_spend_reach_clicks_ios.head())


# APPLE SEARCH IOS
asa_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == asa_advertiser_string, ["date", "advertiser", "campaign_type"]]
asa_spend_reach_clicks_ios["device_type"] = "IOS"
display(asa_spend_reach_clicks_ios.head())


# # APPLE SEARCH BASIC IOS
# asa_basic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[(appsflyer_ios_grouped_final['advertiser'] == asa_advertiser_string)
#                                                                    &
#                                                                    (appsflyer_ios_grouped_final['campaign_type'] == type_basic_string), ["date", "advertiser", "campaign_type"]]
# asa_basic_spend_reach_clicks_ios["device_type"] = "IOS"
# display(asa_basic_spend_reach_clicks_ios.head())

# # APPLE SEARCH ADVANCED IOS
# asa_advanced_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[(appsflyer_ios_grouped_final['advertiser'] == asa_advertiser_string)
#                                                                       &
#                                                                       (appsflyer_ios_grouped_final['campaign_type'] == type_advanced_string), ["date", "advertiser", "campaign_type"]]

# asa_advanced_spend_reach_clicks_ios["device_type"] = "IOS"
# display(asa_advanced_spend_reach_clicks_ios.head())


# ORGANIC ANDROID
organic_spend_reach_clicks_android = appsflyer_android_grouped_final.loc[appsflyer_android_grouped_final['advertiser'] == "Organic", ["date", "advertiser", "campaign_type"]]
organic_spend_reach_clicks_android["device_type"] = "ANDROID"
display(organic_spend_reach_clicks_android.head())



# facebook_advertiser_string = "Facebook Ads"
# asa_advertiser_string = "Apple Search Ads"

# type_evergreen_string = "Evergreen"
# type_other_string = "Other
# type_basic_string = "Basic"
# type_advanced_string = "Advanced"

Unnamed: 0,date,advertiser,campaign_type,device_type
4,2018-06-28,Organic,Other,IOS
15,2018-06-29,Organic,Other,IOS
22,2018-06-30,Organic,Other,IOS
30,2018-07-01,Organic,Other,IOS
38,2018-07-02,Organic,Other,IOS


Unnamed: 0,date,advertiser,campaign_type,device_type
27,2018-07-01,Apple Search Ads,Advanced,IOS
35,2018-07-02,Apple Search Ads,Advanced,IOS
44,2018-07-03,Apple Search Ads,Advanced,IOS
45,2018-07-03,Apple Search Ads,Basic,IOS
54,2018-07-04,Apple Search Ads,Advanced,IOS


Unnamed: 0,date,advertiser,campaign_type,device_type
0,2018-06-28,Organic,Other,ANDROID
2,2018-06-29,Organic,Other,ANDROID
4,2018-06-30,Organic,Other,ANDROID
6,2018-07-01,Organic,Other,ANDROID
8,2018-07-02,Organic,Other,ANDROID


# COMBINE APPSFLYER AND ADVERTISER DATA TOGETHER

### IOS Combination

In [54]:
# facebook_spend_reach_clicks_ios
# pinterest_spend_reach_clicks_ios
# google_spend_reach_clicks_ios
# asa_spend_reach_clicks_ios
# OR
# asa_basic_spend_reach_clicks_ios
# asa_advanced_spend_reach_clicks_ios
# organic_spend_reach_clicks_ios
# appsflyer_ios_grouped_final

advertisers_spend_reach_clicks_ios = pd.concat([facebook_spend_reach_clicks_ios, 
                                                pinterest_spend_reach_clicks_ios, 
                                                google_spend_reach_clicks_ios,
                                                asa_spend_reach_clicks_ios,
#                                                 asa_basic_spend_reach_clicks_ios, 
#                                                 asa_advanced_spend_reach_clicks_ios, 
                                                organic_spend_reach_clicks_ios      ], 
                                               ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_ios = pd.merge(advertisers_spend_reach_clicks_ios, appsflyer_ios_grouped_final, on=["date","advertiser", "campaign_type"], how="left")

display(merge_table_ios.head())


merge_table_ios.to_csv(folder_output_csvs + "/merged_IOS_data.csv", index=False, header=True)




Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique,views,...,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2018-06-28,IOS,Facebook Ads,Other,464.73,54871.0,51019.0,245.0,241.0,,...,10.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,2018-06-28,IOS,Organic,Other,,,,,,,...,29.0,2.0,3.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0
2,2018-06-28,IOS,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
3,2018-06-29,IOS,Organic,Other,,,,,,,...,26.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
4,2018-06-29,IOS,Facebook Ads,Other,1010.64,86403.0,82885.0,473.0,467.0,,...,36.0,2.0,2.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0


### ANDROID Combination

In [63]:
facebook_spend_reach_clicks_android.head()
pinterest_spend_reach_clicks_android.head()
appsflyer_android_grouped_final.head()


advertisers_spend_reach_clicks_android = pd.concat([facebook_spend_reach_clicks_android, pinterest_spend_reach_clicks_android, organic_spend_reach_clicks_android], ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_android = pd.merge(advertisers_spend_reach_clicks_android, appsflyer_android_grouped_final, on=["date","advertiser", "campaign_type"], how="left")

merge_table_android.insert(9, "views", np.nan) 

display(merge_table_android.head())

merge_table_android.to_csv(folder_output_csvs + "/merged_ANDROID_data.csv", index=False, header=True)



Unnamed: 0,date,device_type,advertiser,campaign_type,spend,impressions,impressions_unique,clicks,clicks_unique,views,...,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2018-06-28,ANDROID,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
1,2018-06-28,ANDROID,Organic,Other,,,,,,,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2018-06-29,ANDROID,pinterest_int,Other,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
3,2018-06-29,ANDROID,Organic,Other,,,,,,,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2018-06-30,ANDROID,Organic,Other,,,,,,,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# CREATE AARRR DASHBOARD showing AGGREGATE DATA OVER DATE RANGE

In [46]:
### Date Range
start_date = "2019-05-03"
end_date = "2019-05-06"

columns_to_keep = ["date", "device_type", "advertiser", "campaign_type", "spend", "installs", "trial_starts_unique"]


### IOS Aggregate Data

In [48]:
ios_aarrr = merge_table_ios.loc[(merge_table_ios["date"] >= start_date) 
                                        & 
                                        (merge_table_ios["date"] <= end_date), columns_to_keep]\
                                        .groupby(["advertiser", "campaign_type"],as_index=False).sum()

display(ios_aarrr.head())

ios_aarrr.to_csv(folder_output_csvs + "/ios_aarrr_" + start_date + "_to_" + end_date + ".csv", index=False, header=True)



Unnamed: 0,advertiser,campaign_type,spend,installs,trial_starts_unique
0,Apple Search Ads,Advanced,0.0,52.0,5.0
1,Apple Search Ads,Basic,0.0,230.0,16.0
2,Facebook Ads,Evergreen,4339.71,1823.0,208.0
3,Facebook Ads,Other,997.23,415.0,20.0
4,Organic,Other,0.0,3897.0,190.0
5,googleadwords_int,Other,1135.29,477.0,10.0
6,pinterest_int,Other,4041.35,4283.0,224.0


### Android Aggregate Data

In [53]:
android_aarrr = merge_table_android.loc[(merge_table_android["date"] >= start_date) 
                                        & 
                                        (merge_table_android["date"] <= end_date), columns_to_keep]\
                                        .groupby(["advertiser", "campaign_type"],as_index=False).sum()

display(android_aarrr.head())

android_aarrr.to_csv(folder_output_csvs + "/android_aarrr_" + start_date + "_to_" + end_date + ".csv", index=False, header=True)




Unnamed: 0,advertiser,campaign_type,spend,installs,trial_starts_unique
0,Facebook Ads,Evergreen,2000.84,953.0,69.0
1,Facebook Ads,Other,476.94,113.0,2.0
2,Organic,Other,0.0,631.0,15.0
3,pinterest_int,Other,1029.26,983.0,41.0
