In [1]:
import pandas as pd
from sqlalchemy import create_engine

# NOTES ON THIS VERSION

This version takes a Facebook, Pinterest, and Google ads CSV for the advertisers
It then takes an iOS and Android AppsFlyer CSV for the Installs/down the funnel events

The Facebook file has a column for "campaign name" which always has "IOS" or "ANDROID" in the title and "Evergreen" if it is one of the evergreen campaigns. This allows us to split up the performance by platform as well as by evergreen/non evergreen

# ADVERTISER DATA PREPARATION

### Global Variables

In [2]:
folder_with_csvs = "Resources"
folder_output_csvs = "Output_2019-05-05"

### Extract Advertiser CSVs into DataFrames

In [4]:
facebook_file = folder_with_csvs + "/FACEBOOK.csv"
facebook_df = pd.read_csv(facebook_file)
display(facebook_df.head())

facebook_eg_string = "Facebook Ads EG"
facebook_non_eg_string = "Facebook Ads"

Unnamed: 0,Day,Campaign Name,Amount Spent (USD),Impressions,Reach,Link Clicks,Link Clicks [On Ad],Unique Link Clicks,Unique Link Clicks [On Ad],Mobile App Installs,Unique Mobile App Tutorials Completed,Unique Mobile App Purchases,Reporting Starts,Reporting Ends
0,2019-05-05,bidalgo_2019_04 | Evergreen CPT with CBO and P...,1171.71,136002,124896,1406.0,1406.0,1397.0,1397.0,601.0,342.0,75.0,2019-05-05,2019-05-05
1,2019-05-05,bidalgo_2019_04_27 | Evergreen CPT with CBO an...,500.84,66278,54112,781.0,781.0,772.0,772.0,289.0,160.0,19.0,2019-05-05,2019-05-05
2,2019-05-05,bidalgo_2018_06 | Launch Ads V1 | iOS | Moms 2...,0.14,24,23,,,,,,,,2019-05-05,2019-05-05
3,2019-05-05,bidalgo_2019_05_04 | CPI TEST Intimacy Ads | I...,204.33,44857,43184,470.0,470.0,469.0,469.0,138.0,84.0,6.0,2019-05-05,2019-05-05
4,2019-05-05,bidalgo_2018_07 | Launch Ads - VIDEO | iOS | M...,9.9,1552,1462,1.0,1.0,1.0,1.0,2.0,2.0,,2019-05-05,2019-05-05


In [5]:
pinterest_file = folder_with_csvs + "/PINTEREST.csv"
pinterest_df = pd.read_csv(pinterest_file)
display(pinterest_df.head())

Unnamed: 0,Campaign ID,Campaign name,Date,Campaign goal,Ad Format Type,Spend in account currency,Impressions,Clicks,Saves,Closeups,...,Earned Send Unique Users,Downstream Flag Unique Users,Sum of Installs from all Paid activity,Installs resulting from Paid clicks,Installs resulting from views on Paid promotion,Installs resulting from Paid Close Ups & Saves,Sum of Installs from all Earned activity,Installs resulting from Earned clicks,Installs resulting from views on Earned promotions,Installs resulting from Earned Close Ups & Saves
0,C626739958135,iOS - Prospecting - KW,2019-04-06,App_install_traffic,One Tap,198.58,283036.0,1663.0,49.0,16084.0,...,0.0,0,142.0,130.0,8.0,4.0,0.0,0.0,0.0,0.0
1,C626739958135,iOS - Prospecting - KW,2019-04-07,App_install_traffic,One Tap,200.0,271653.0,1660.0,36.0,15923.0,...,0.0,0,164.0,134.0,26.0,4.0,0.0,0.0,0.0,0.0
2,C626739958135,iOS - Prospecting - KW,2019-04-08,App_install_traffic,One Tap,199.59,276760.0,1677.0,37.0,13513.0,...,0.0,0,198.0,175.0,20.0,3.0,2.0,2.0,0.0,0.0
3,C626739958135,iOS - Prospecting - KW,2019-04-09,App_install_traffic,One Tap,100.0,153199.0,839.0,13.0,7811.0,...,0.0,0,96.0,89.0,4.0,3.0,0.0,0.0,0.0,0.0
4,C626739958135,iOS - Prospecting - KW,2019-04-10,App_install_traffic,One Tap,100.0,157408.0,832.0,28.0,7886.0,...,0.0,0,63.0,58.0,3.0,2.0,0.0,0.0,0.0,0.0


In [6]:
google_file = folder_with_csvs + "/GOOGLE.csv"
google_df = pd.read_csv(google_file, skiprows=2)
display(google_df.head())

Unnamed: 0,Day,Campaign type,Campaign,Currency,Cost,Impressions,Views,Clicks
0,2019-04-24,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,514.63,95418,10756,279
1,2019-04-25,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,506.81,162903,13529,471
2,2019-04-26,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,81.79,46785,3566,137
3,2019-04-27,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,909.81,596122,23035,2005
4,2019-04-28,Universal app,2019_04 | UAC INSTALLS - V1 | IOS | image - ex...,USD,112.49,46903,2373,231


In [7]:
# asa_file = folder_with_csvs + "/ASA.csv"
# asa_df = pd.read_csv(asa_file)
# asa_df.head()

asa_basic_string = "ASA Basic"
asa_advanced_string = "ASA Advanced"

### Transform FACEBOOK DataFrame

In [9]:
# Create a filtered dataframe from specific columns
facebook_cols = ["Day", "Campaign Name", "Amount Spent (USD)", 
                 "Impressions", "Reach", "Link Clicks", "Unique Link Clicks"]

facebook_transformed = facebook_df[facebook_cols].copy()

# Rename the column headers
facebook_transformed = facebook_transformed.rename(columns={"Day": "date",
                                                            "Campaign Name": "campaign_name",
                                                            "Amount Spent (USD)": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Reach": "impressions_unique",
                                                            "Link Clicks": "clicks",
                                                            "Unique Link Clicks": "clicks_unique"})


# # names for devices in each platform
# device_names_ios = ["ipod","iphone","ipad"]
# device_names_android = ["android_smartphone","android_tablet"]


# # add column to identify which platform (IOS or ANDROID) was being delivered impressions
# facebook_transformed["device_type"] = "other"
# facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_ios) ,["device_type"]] = "IOS"
# facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_android) ,["device_type"]] = "ANDROID"


facebook_transformed["device_type"] = "other"
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"

facebook_transformed["advertiser"] = facebook_non_eg_string
facebook_transformed.loc[facebook_transformed['campaign_name'].str.contains('(?i)Evergreen'),"advertiser"] = facebook_eg_string


# facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type"]).sum()
facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type", "advertiser"],as_index=False).sum()

facebook_spend_reach_by_device_type_final = facebook_spend_reach_by_device_type.loc[facebook_spend_reach_by_device_type["device_type"] != "other", :]

# display(facebook_spend_reach_by_device_type_final.head())



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
# facebook_spend_reach_by_device_type_final["advertiser"] = facebook_non_eg_string

facebook_spend_reach_clicks_ios = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "IOS",:]
facebook_spend_reach_clicks_android = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

# pinterest_spend_reach_clicks_android = 


display(facebook_spend_reach_clicks_ios.head())
display(facebook_spend_reach_clicks_android.tail())




Unnamed: 0,date,device_type,advertiser,spend,impressions,impressions_unique,clicks,clicks_unique
1,2019-04-06,IOS,Facebook Ads,489.48,115877,112233,826.0,822.0
2,2019-04-06,IOS,Facebook Ads EG,497.64,85205,82800,772.0,766.0
4,2019-04-07,IOS,Facebook Ads,439.67,104646,101376,640.0,637.0
5,2019-04-07,IOS,Facebook Ads EG,500.4,91179,89453,794.0,788.0
7,2019-04-08,IOS,Facebook Ads,379.69,89908,88666,473.0,470.0


Unnamed: 0,date,device_type,advertiser,spend,impressions,impressions_unique,clicks,clicks_unique
87,2019-05-03,ANDROID,Facebook Ads,367.06,55099,46112,367.0,363.0
88,2019-05-03,ANDROID,Facebook Ads EG,500.22,50496,40454,516.0,514.0
91,2019-05-04,ANDROID,Facebook Ads,109.88,16523,14355,114.0,114.0
92,2019-05-04,ANDROID,Facebook Ads EG,497.68,63057,48960,588.0,585.0
95,2019-05-05,ANDROID,Facebook Ads EG,500.84,66278,54112,781.0,772.0


### Transform PINTEREST DataFrame

In [10]:
pinterest_cols = ["Campaign ID", "Campaign name", "Date", "Spend in account currency", 
                  "Impressions", "Impression Unique Users", "Clicks", "Click Unique Users"]

pinterest_transformed = pinterest_df[pinterest_cols].copy()

# Rename the column headers
pinterest_transformed = pinterest_transformed.rename(columns={"Campaign ID": "campaign_id",
                                                         "Campaign name": "campaign_name",
                                                         "Date": "date",
                                                         "Spend in account currency": "spend",
                                                         "Impressions": "impressions",
                                                         "Impression Unique Users": "impressions_unique",
                                                         "Clicks": "clicks",
                                                         "Click Unique Users": "clicks_unique"})

pinterest_transformed["device_type"] = "other"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


pinterest_spend_reach_by_device_type = pinterest_transformed.groupby(["date","device_type"],as_index=False).sum()

pinterest_spend_reach_by_device_type_final = pinterest_spend_reach_by_device_type.loc[pinterest_spend_reach_by_device_type["device_type"] != "other", :]

pinterest_spend_reach_by_device_type_final.head(10)



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
pinterest_spend_reach_by_device_type_final["advertiser"] = "pinterest_int"

pinterest_spend_reach_clicks_ios = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "IOS",:]
pinterest_spend_reach_clicks_android = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

# pinterest_spend_reach_clicks_android = 


display(pinterest_spend_reach_clicks_ios.head())
display(pinterest_spend_reach_clicks_android.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
1,2019-04-06,IOS,652.34,744777.0,477859.0,4290.0,4196.0,pinterest_int
4,2019-04-07,IOS,724.14,778950.0,507270.0,4645.0,4529.0,pinterest_int
7,2019-04-08,IOS,829.15,850544.0,571909.0,5363.0,5238.0,pinterest_int
10,2019-04-09,IOS,672.04,758350.0,538871.0,4582.0,4513.0,pinterest_int
13,2019-04-10,IOS,644.11,671085.0,450678.0,4125.0,4058.0,pinterest_int


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
0,2019-04-06,ANDROID,150.0,162507.0,101204.0,1107.0,1092.0,pinterest_int
3,2019-04-07,ANDROID,149.65,171508.0,105752.0,1128.0,1117.0,pinterest_int
6,2019-04-08,ANDROID,149.4,159137.0,100565.0,1129.0,1110.0,pinterest_int
9,2019-04-09,ANDROID,150.0,171746.0,106958.0,1125.0,1111.0,pinterest_int
12,2019-04-10,ANDROID,149.88,160629.0,101394.0,1114.0,1105.0,pinterest_int


### Transform GOOGLE ADS DataFrame

In [11]:
# Create a filtered dataframe from specific columns
google_cols = ["Day", "Campaign", "Cost", 
                 "Impressions", "Views", "Clicks"]

google_transformed = google_df[google_cols].copy()

# Rename the column headers
google_transformed = google_transformed.rename(columns={"Day": "date",
                                                            "Campaign": "campaign_name",
                                                            "Cost": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Views": "views",
                                                            "Clicks": "clicks"})

# display(google_transformed["spend"].dtypes)

google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)

# pd.to_numeric(s, errors='ignore')

# pinterest_transformed["device_type"] = "other"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


google_transformed["device_type"] = "IOS"

google_spend_reach_by_device_type = google_transformed.groupby(["date","device_type"],as_index=False).sum()

google_spend_reach_by_device_type_final = google_spend_reach_by_device_type.loc[google_spend_reach_by_device_type["device_type"] != "other", :]

# display(google_spend_reach_by_device_type_final.head())



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
google_spend_reach_by_device_type_final["advertiser"] = "googleadwords_int"


google_spend_reach_clicks_ios = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "IOS",:]
google_spend_reach_clicks_android = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

display(google_spend_reach_clicks_ios.head())
display(google_spend_reach_clicks_android.tail())




dtype('float64')

Unnamed: 0,date,device_type,spend,impressions,views,clicks
0,2019-04-24,IOS,514.63,95418,10756,279
1,2019-04-25,IOS,506.81,162903,13529,471
2,2019-04-26,IOS,81.79,46785,3566,137
3,2019-04-27,IOS,909.81,596122,23035,2005
4,2019-04-28,IOS,112.49,46903,2373,231


Unnamed: 0,date,device_type,spend,impressions,views,clicks,advertiser
0,2019-04-24,IOS,514.63,95418,10756,279,googleadwords_int
1,2019-04-25,IOS,506.81,162903,13529,471,googleadwords_int
2,2019-04-26,IOS,81.79,46785,3566,137,googleadwords_int
3,2019-04-27,IOS,909.81,596122,23035,2005,googleadwords_int
4,2019-04-28,IOS,112.49,46903,2373,231,googleadwords_int


Unnamed: 0,date,device_type,spend,impressions,views,clicks,advertiser


In [None]:
# google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
# google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
# google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)
# # google_transformed[["impressions", "views"]] = google_transformed[["impressions", "views"]].apply(pd.to_numeric)


# google_transformed.head()

# APPSFLYER DATA PREPARATION

### Extract APPSFLYER CSVs into DataFrames

In [12]:
appsflyer_ios_file = folder_with_csvs + "/APPSFLYER-IOS.csv"
appsflyer_ios_df = pd.read_csv(appsflyer_ios_file)
display(appsflyer_ios_df.head(100))

Unnamed: 0,Date,Agency/PMD (af_prt),Media Source (pid),Campaign (c),Impressions,Clicks,CTR,Installs,Conversion Rate,Sessions,...,ltv_20190501 (Sales in USD),ltv_20190504 (Unique users),ltv_20190504 (Event counter),ltv_20190504 (Sales in USD),ltv_20190505 (Unique users),ltv_20190505 (Event counter),ltv_20190505 (Sales in USD),new_workout_saved (Unique users),new_workout_saved (Event counter),new_workout_saved (Sales in USD)
0,2019-04-06,,Apple Search Ads,Moov Studio: Fitness Workouts - 1274153663,,,,40,,126,...,0.00,0,0,0.00000,0,0,0.00000,32,44,0.0
1,2019-04-06,,Apple Search Ads,Performance - Brand,104.0,6.0,0.0577,2,0.3333,5,...,0.00,0,0,0.00000,0,0,0.00000,0,0,0.0
2,2019-04-06,,Apple Search Ads,Performance - Competitor,225.0,14.0,0.0622,6,0.4286,13,...,0.00,0,0,0.00000,0,0,0.00000,1,1,0.0
3,2019-04-06,,Apple Search Ads,Performance - Generic,139.0,16.0,0.1151,8,0.5000,26,...,59.99,1,1,59.99000,1,1,59.99000,6,9,0.0
4,2019-04-06,,Deep Linking,Deep Link Redirect iOS,,16.0,,0,0.0000,0,...,0.00,0,0,0.00000,0,0,0.00000,0,0,0.0
5,2019-04-06,,Facebook Ads,2018_06 | Launch Ads V1 | iOS | Moms 25-39 | 4...,673.0,8.0,0.0119,1,0.1250,1,...,0.00,0,0,0.00000,0,0,0.00000,1,1,0.0
6,2019-04-06,,Facebook Ads,2018_07 | Launch Ads - VIDEO | iOS | Moms 25-3...,1005.0,4.0,0.0040,3,0.7500,12,...,0.00,0,0,0.00000,0,0,0.00000,2,2,0.0
7,2019-04-06,,Facebook Ads,2019_02 | Trial Start Optimized Delivery with ...,1.0,,,0,,0,...,0.00,0,0,0.00000,0,0,0.00000,0,0,0.0
8,2019-04-06,,Facebook Ads,2019_03 | TEST Campaign | iOS | Multiple Audie...,70706.0,580.0,0.0082,224,0.3862,611,...,59.99,1,1,59.99000,1,1,59.99000,124,195,0.0
9,2019-04-06,,Facebook Ads,2019_03 | Trial Start Optimized Delivery with ...,145855.0,1342.0,0.0092,358,0.2668,1296,...,779.82,18,18,779.82000,18,18,779.82000,206,643,0.0


In [13]:
appsflyer_android_file = folder_with_csvs + "/APPSFLYER-ANDROID.csv"
appsflyer_android_df = pd.read_csv(appsflyer_android_file)
display(appsflyer_android_df.head())

Unnamed: 0,Date,Agency/PMD (af_prt),Media Source (pid),Campaign (c),Impressions,Clicks,CTR,Installs,Conversion Rate,Sessions,...,ltv_20190501 (Sales in USD),ltv_20190504 (Unique users),ltv_20190504 (Event counter),ltv_20190504 (Sales in USD),ltv_20190505 (Unique users),ltv_20190505 (Event counter),ltv_20190505 (Sales in USD),new_workout_saved (Unique users),new_workout_saved (Event counter),new_workout_saved (Sales in USD)
0,2019-04-06,,Facebook Ads,2019_02 | Trial Start Optimized Delivery with ...,65574.0,1065.0,0.0162,398,0.3737,1992,...,139.96,3,3,139.96,4,4,199.95001,216,331,0.0
1,2019-04-06,,Organic,,,,,124,,0,...,0.0,0,0,0.0,0,0,0.0,63,192,0.0
2,2019-04-06,,SocialInstagram,InstagramBioUniv,,34.0,,17,0.5,65,...,0.0,0,0,0.0,0,0,0.0,13,17,0.0
3,2019-04-06,,Website,Bottom of Page Android Download Link,,5.0,,1,0.2,5,...,0.0,0,0,0.0,0,0,0.0,1,1,0.0
4,2019-04-06,,Website,FAQ Android Link,,1.0,,0,0.0,0,...,0.0,0,0,0.0,0,0,0.0,0,0,0.0


### SETUP VARIABLES TO USE FOR APPSFLYER IOS/ANDROID DataFrame Transformations

In [14]:
most_recent_ltv_column_prefix = "ltv_20190504"
paid_acquisition_advertisers = [facebook_non_eg_string, facebook_eg_string, "pinterest_int", "googleadwords_int", asa_basic_string, asa_advanced_string, "Organic"]


appsflyer_cols = ["Date", "Media Source (pid)", "Campaign (c)", "Installs", "Sessions", 
                      "new_workout_saved (Unique users)", 
                      "af_purchase (Unique users)",
                      "af_purchase (Event counter)", 
                      "af_start_trial (Unique users)", 
                      "af_start_trial (Event counter)",
                      most_recent_ltv_column_prefix + " (Unique users)",
                      most_recent_ltv_column_prefix + " (Event counter)",
                      most_recent_ltv_column_prefix + " (Sales in USD)"]

### Transform APPSFLYER IOS DataFrame

In [15]:
appsflyer_ios_transformed = appsflyer_ios_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_ios_transformed = appsflyer_ios_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Campaign (c)": "campaign_name",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})


# appsflyer_ios_transformed.loc[appsflyer_ios_transformed['campaign_name'].str.contains('(?i)Evergreen'),"advertiser"] = facebook_eg_string

appsflyer_ios_transformed.loc[(appsflyer_ios_transformed['advertiser'] == facebook_non_eg_string)
                              &
                              (appsflyer_ios_transformed['campaign_name'].str.contains('(?i)Evergreen')), "advertiser"] = facebook_eg_string


appsflyer_ios_transformed.loc[(appsflyer_ios_transformed['advertiser'] == "Apple Search Ads")
                              &
                              (appsflyer_ios_transformed['campaign_name'].str.contains('1274153663')), "advertiser"] = asa_basic_string


appsflyer_ios_transformed.loc[appsflyer_ios_transformed['advertiser'] == "Apple Search Ads", "advertiser"] = asa_advanced_string


appsflyer_ios_transformed.insert(10, "trial_starts_unique", appsflyer_ios_transformed["af_purchase_unique"] + appsflyer_ios_transformed["af_start_trial_unique"]) 
appsflyer_ios_transformed.insert(11, "trial_starts_all", appsflyer_ios_transformed["af_purchase_all"] + appsflyer_ios_transformed["af_start_trial_all"]) 

# appsflyer_ios_transformed["trial_starts_total_unique"] = appsflyer_ios_transformed["af_purchase_unique"] + appsflyer_ios_transformed["af_start_trial_unique"]
# appsflyer_ios_transformed["trial_starts_total_all"] = appsflyer_ios_transformed["af_purchase_all"] + appsflyer_ios_transformed["af_start_trial_all"]

appsflyer_ios_grouped = appsflyer_ios_transformed.groupby(["date","advertiser"],as_index=False).sum()

appsflyer_ios_grouped_final = appsflyer_ios_grouped.loc[appsflyer_ios_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_ios_grouped_final.head(200)



Unnamed: 0,date,advertiser,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2019-04-06,ASA Advanced,16,44,7,1,1,0,0,1,1,1,1,59.99000
1,2019-04-06,ASA Basic,40,126,32,0,0,0,0,0,0,0,0,0.00000
3,2019-04-06,Facebook Ads,655,2178,377,51,51,0,0,51,51,21,21,859.79000
4,2019-04-06,Facebook Ads EG,0,0,0,0,0,0,0,0,0,0,0,0.00000
5,2019-04-06,Organic,709,0,345,27,31,0,0,27,31,0,0,0.00000
8,2019-04-06,pinterest_int,598,1795,365,26,26,1,1,27,27,11,11,509.89000
9,2019-04-07,ASA Advanced,25,62,16,1,1,0,0,1,1,0,0,0.00000
10,2019-04-07,ASA Basic,48,157,30,4,4,0,0,4,4,1,1,59.99000
12,2019-04-07,Facebook Ads,668,2305,373,57,58,0,0,57,58,26,26,1059.74004
13,2019-04-07,Facebook Ads EG,0,0,0,0,0,0,0,0,0,0,0,0.00000


In [None]:
# organic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == "Organic", ["date", "advertiser"]]
# organic_spend_reach_clicks_ios["device_type"] = "IOS"
# organic_spend_reach_clicks_ios.head()

### Transform APPSFLYER ANDROID DataFrame

In [16]:
appsflyer_android_transformed = appsflyer_android_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_android_transformed = appsflyer_android_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Campaign (c)": "campaign_name",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})


appsflyer_android_transformed.loc[appsflyer_android_transformed['campaign_name'].str.contains('(?i)Evergreen'),"advertiser"] = facebook_eg_string

appsflyer_android_transformed.insert(10, "trial_starts_unique", appsflyer_android_transformed["af_purchase_unique"] + appsflyer_android_transformed["af_start_trial_unique"]) 
appsflyer_android_transformed.insert(11, "trial_starts_all", appsflyer_android_transformed["af_purchase_all"] + appsflyer_android_transformed["af_start_trial_all"]) 

# appsflyer_android_transformed["trial_starts_total_unique"] = appsflyer_android_transformed["af_purchase_unique"] + appsflyer_android_transformed["af_start_trial_unique"]
# appsflyer_android_transformed["trial_starts_total_all"] = appsflyer_android_transformed["af_purchase_all"] + appsflyer_android_transformed["af_start_trial_all"]

appsflyer_android_grouped = appsflyer_android_transformed.groupby(["date","advertiser"],as_index=False).sum()

appsflyer_android_grouped_final = appsflyer_android_grouped.loc[appsflyer_android_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_android_grouped_final.tail()



Unnamed: 0,date,advertiser,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
168,2019-05-04,pinterest_int,229,947,134,0,0,11,11,11,11,0,0,0.0
169,2019-05-05,Facebook Ads,14,58,3,0,0,0,0,0,0,0,0,0.0
170,2019-05-05,Facebook Ads EG,276,1109,151,1,1,19,19,20,20,0,0,0.0
171,2019-05-05,Organic,171,0,70,0,0,7,8,7,8,0,0,0.0
174,2019-05-05,pinterest_int,274,1082,168,0,0,7,7,7,7,0,0,0.0


### Create Placeholders for Organic, ASA (Basic and Advanced) DataFrame with Spend, Impressions, Clicks, etc.

In [17]:
# ORGANIC IOS
organic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == "Organic", ["date", "advertiser"]]
organic_spend_reach_clicks_ios["device_type"] = "IOS"
display(organic_spend_reach_clicks_ios.head())


# APPLE SEARCH BASIC IOS
asa_basic_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == asa_basic_string, ["date", "advertiser"]]
asa_basic_spend_reach_clicks_ios["device_type"] = "IOS"
display(asa_basic_spend_reach_clicks_ios.head())

# APPLE SEARCH ADVANCED IOS
asa_advanced_spend_reach_clicks_ios = appsflyer_ios_grouped_final.loc[appsflyer_ios_grouped_final['advertiser'] == asa_advanced_string, ["date", "advertiser"]]
asa_advanced_spend_reach_clicks_ios["device_type"] = "IOS"
display(asa_advanced_spend_reach_clicks_ios.head())


# ORGANIC ANDROID
organic_spend_reach_clicks_android = appsflyer_android_grouped_final.loc[appsflyer_android_grouped_final['advertiser'] == "Organic", ["date", "advertiser"]]
organic_spend_reach_clicks_android["device_type"] = "ANDROID"
display(organic_spend_reach_clicks_android.head())

Unnamed: 0,date,advertiser,device_type
5,2019-04-06,Organic,IOS
14,2019-04-07,Organic,IOS
24,2019-04-08,Organic,IOS
34,2019-04-09,Organic,IOS
43,2019-04-10,Organic,IOS


Unnamed: 0,date,advertiser,device_type
1,2019-04-06,ASA Basic,IOS
10,2019-04-07,ASA Basic,IOS
20,2019-04-08,ASA Basic,IOS
30,2019-04-09,ASA Basic,IOS
39,2019-04-10,ASA Basic,IOS


Unnamed: 0,date,advertiser,device_type
0,2019-04-06,ASA Advanced,IOS
9,2019-04-07,ASA Advanced,IOS
19,2019-04-08,ASA Advanced,IOS
29,2019-04-09,ASA Advanced,IOS
38,2019-04-10,ASA Advanced,IOS


Unnamed: 0,date,advertiser,device_type
1,2019-04-06,Organic,ANDROID
6,2019-04-07,Organic,ANDROID
12,2019-04-08,Organic,ANDROID
17,2019-04-09,Organic,ANDROID
24,2019-04-10,Organic,ANDROID


# COMBINE APPSFLYER AND ADVERTISER DATA TOGETHER

### IOS Combination

In [24]:
# facebook_spend_reach_clicks_ios
# pinterest_spend_reach_clicks_ios
# google_spend_reach_clicks_ios
# asa_basic_spend_reach_clicks_ios
# asa_advanced_spend_reach_clicks_ios
# organic_spend_reach_clicks_ios
# appsflyer_ios_grouped_final

advertisers_spend_reach_clicks_ios = pd.concat([facebook_spend_reach_clicks_ios, 
                                                pinterest_spend_reach_clicks_ios, 
                                                google_spend_reach_clicks_ios, 
                                                asa_basic_spend_reach_clicks_ios, 
                                                asa_advanced_spend_reach_clicks_ios, 
                                                organic_spend_reach_clicks_ios      ], 
                                               ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_ios = pd.merge(advertisers_spend_reach_clicks_ios, appsflyer_ios_grouped_final, on=["date","advertiser"], how="left")
# merge_table_ios = pd.merge(advertisers_spend_reach_clicks_ios, appsflyer_ios_grouped_final, on=["date","advertiser"], how="outer")

display(merge_table_ios.head())


merge_table_ios.to_csv(folder_output_csvs + "/merged_IOS_data.csv", index=False, header=True)

Unnamed: 0,date,device_type,advertiser,spend,impressions,impressions_unique,clicks,clicks_unique,views,installs,...,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2019-04-06,IOS,Facebook Ads,489.48,115877.0,112233.0,826.0,822.0,,655,...,377,51,51,0,0,51,51,21,21,859.79
1,2019-04-06,IOS,Facebook Ads EG,497.64,85205.0,82800.0,772.0,766.0,,0,...,0,0,0,0,0,0,0,0,0,0.0
2,2019-04-06,IOS,pinterest_int,652.34,744777.0,477859.0,4290.0,4196.0,,598,...,365,26,26,1,1,27,27,11,11,509.89
3,2019-04-06,IOS,Organic,,,,,,,709,...,345,27,31,0,0,27,31,0,0,0.0
4,2019-04-06,IOS,ASA Basic,,,,,,,40,...,32,0,0,0,0,0,0,0,0,0.0


### ANDROID Combination

In [25]:
facebook_spend_reach_clicks_android.head()
pinterest_spend_reach_clicks_android.head()
appsflyer_android_grouped_final.head()


advertisers_spend_reach_clicks_android = pd.concat([facebook_spend_reach_clicks_android, pinterest_spend_reach_clicks_android, organic_spend_reach_clicks_android], ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_android = pd.merge(advertisers_spend_reach_clicks_android, appsflyer_android_grouped_final, on=["date","advertiser"], how="left")
# merge_table_android = pd.merge(advertisers_spend_reach_clicks_android, appsflyer_android_grouped_final, on=["date","advertiser"], how="outer")

display(merge_table_android.head())

merge_table_android.to_csv(folder_output_csvs + "/merged_ANDROID_data.csv", index=False, header=True)

Unnamed: 0,date,device_type,advertiser,spend,impressions,impressions_unique,clicks,clicks_unique,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,trial_starts_unique,trial_starts_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2019-04-06,ANDROID,Facebook Ads,500.83,65586.0,64112.0,1065.0,1058.0,398,1992,216,16,17,1,1,17,18,3,3,139.96
1,2019-04-06,ANDROID,Organic,,,,,,124,0,63,6,6,0,0,6,6,0,0,0.0
2,2019-04-06,ANDROID,pinterest_int,150.0,162507.0,101204.0,1107.0,1092.0,188,967,121,7,8,0,0,7,8,1,1,9.99
3,2019-04-07,ANDROID,Facebook Ads,503.05,77194.0,74368.0,1327.0,1318.0,499,3406,311,31,33,1,1,32,34,16,16,659.84
4,2019-04-07,ANDROID,Organic,,,,,,242,0,145,11,13,0,0,11,13,0,0,0.0


# CREATE AARRR DASHBOARD showing AGGREGATE DATA OVER DATE RANGE

In [26]:
### Date Range
start_date = "2019-05-05"
end_date = "2019-05-05"

columns_to_keep = ["date", "device_type", "advertiser", "spend", "installs", "trial_starts_unique"]


### IOS Aggregate Data

In [31]:
ios_aarrr = merge_table_ios.loc[(merge_table_ios["date"] >= start_date) 
                                        & 
                                        (merge_table_ios["date"] <= end_date), columns_to_keep]\
                                        .groupby(["advertiser"],as_index=False).sum()

display(ios_aarrr.head(20))

ios_aarrr.to_csv(folder_output_csvs + "/ios_aarrr_" + start_date + "_to_" + end_date + ".csv", index=False, header=True)

Unnamed: 0,advertiser,spend,installs,trial_starts_unique
0,ASA Advanced,0.0,132,10
1,ASA Basic,0.0,303,23
2,Facebook Ads,2292.11,719,35
3,Facebook Ads EG,6446.19,2827,342
4,Organic,0.0,6481,355
5,googleadwords_int,3406.27,1844,63
6,pinterest_int,5990.49,7187,445


### Android Aggregate Data

In [28]:
android_aarrr = merge_table_android.loc[(merge_table_android["date"] >= start_date) 
                                        & 
                                        (merge_table_android["date"] <= end_date), columns_to_keep]\
                                        .groupby(["advertiser"],as_index=False).sum()

android_aarrr.head(100)

android_aarrr.to_csv(folder_output_csvs + "/android_aarrr_" + start_date + "_to_" + end_date + ".csv", index=False, header=True)