In [3]:
import pandas as pd
from sqlalchemy import create_engine

# NOTES ON THIS VERSION

This version takes a Facebook, Pinterest, and Google ads CSV for the advertisers
It then takes an iOS and Android AppsFlyer CSV for the Installs/down the funnel events

The Facebook file has a column for "impression device" which allows us to split up the performance by platform
The next version of this script will get this information from the campaign name similar to how it does for Pinterest

# ADVERTISER DATA PREPARATION

### Extract Advertiser CSVs into DataFrames

In [133]:
facebook_file = "Resources/APRIL-FACEBOOK.csv"
facebook_df = pd.read_csv(facebook_file)
# facebook_df.head()

In [132]:
pinterest_file = "Resources/APRIL-PINTEREST.csv"
pinterest_df = pd.read_csv(pinterest_file)
# pinterest_df.head()

In [182]:
google_file = "Resources/APRIL-GOOGLE.csv"
google_df = pd.read_csv(google_file, skiprows=2)
google_df.head()

Unnamed: 0,Day,Campaign type,Campaign,Currency,Cost,Impressions,Views,Clicks
0,2019-04-24,Universal app,2019_04 | UAC INSTALLS - V1 | image - ex and l...,USD,514.63,95418,10756,279
1,2019-04-25,Universal app,2019_04 | UAC INSTALLS - V1 | image - ex and l...,USD,506.81,162903,13529,471
2,2019-04-26,Universal app,2019_04 | UAC INSTALLS - V1 | image - ex and l...,USD,81.79,46785,3566,137
3,2019-04-27,Universal app,2019_04 | UAC INSTALLS - V1 | image - ex and l...,USD,909.81,596122,23035,2005
4,2019-04-28,Universal app,2019_04 | UAC INSTALLS - V1 | image - ex and l...,USD,112.49,46903,2373,231


### Transform FACEBOOK DataFrame

In [134]:
# Create a filtered dataframe from specific columns
facebook_cols = ["Day", "Impression Device", "Amount Spent (USD)", 
                 "Impressions", "Reach", "Link Clicks", "Unique Link Clicks"]

facebook_transformed = facebook_df[facebook_cols].copy()

# Rename the column headers
facebook_transformed = facebook_transformed.rename(columns={"Day": "date",
                                                            "Impression Device": "impression_device",
                                                            "Amount Spent (USD)": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Reach": "impressions_unique",
                                                            "Link Clicks": "clicks",
                                                            "Unique Link Clicks": "clicks_unique"})


# names for devices in each platform
device_names_ios = ["ipod","iphone","ipad"]
device_names_android = ["android_smartphone","android_tablet"]


# add column to identify which platform (IOS or ANDROID) was being delivered impressions
facebook_transformed["device_type"] = "other"
facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_ios) ,["device_type"]] = "IOS"
facebook_transformed.loc[facebook_transformed["impression_device"].isin(device_names_android) ,["device_type"]] = "ANDROID"


# facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type"]).sum()
facebook_spend_reach_by_device_type = facebook_transformed.groupby(["date","device_type"],as_index=False).sum()

facebook_spend_reach_by_device_type_final = facebook_spend_reach_by_device_type.loc[facebook_spend_reach_by_device_type["device_type"] != "other", :]

# display(facebook_spend_reach_by_device_type_final.head())



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
facebook_spend_reach_by_device_type_final["advertiser"] = "Facebook Ads"

facebook_spend_reach_clicks_ios = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "IOS",:]
facebook_spend_reach_clicks_android = facebook_spend_reach_by_device_type_final.loc[facebook_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

# pinterest_spend_reach_clicks_android = 


display(facebook_spend_reach_clicks_ios.head())
display(facebook_spend_reach_clicks_android.head())




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
1,2019-04-01,IOS,782.842767,168814,164411,1454.0,1447.0,Facebook Ads
4,2019-04-02,IOS,783.349164,176108,171981,1364.0,1358.0,Facebook Ads
7,2019-04-03,IOS,781.32111,176535,171842,1311.0,1304.0,Facebook Ads
10,2019-04-04,IOS,778.177895,279965,278139,1570.0,1563.0,Facebook Ads
13,2019-04-05,IOS,779.793705,310411,303413,1707.0,1702.0,Facebook Ads


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
0,2019-04-01,ANDROID,579.947234,75660,69566,1113.0,1107.0,Facebook Ads
3,2019-04-02,ANDROID,578.020837,76334,71007,1210.0,1199.0,Facebook Ads
6,2019-04-03,ANDROID,624.988891,80596,74874,1239.0,1228.0,Facebook Ads
9,2019-04-04,ANDROID,507.512105,101187,99134,1373.0,1366.0,Facebook Ads
12,2019-04-05,ANDROID,509.286295,103187,96496,1250.0,1240.0,Facebook Ads


### Transform PINTEREST DataFrame

In [118]:
pinterest_cols = ["Campaign ID", "Campaign name", "Date", "Spend in account currency", 
                  "Impressions", "Impression Unique Users", "Clicks", "Click Unique Users"]

pinterest_transformed = pinterest_df[pinterest_cols].copy()

# Rename the column headers
pinterest_transformed = pinterest_transformed.rename(columns={"Campaign ID": "campaign_id",
                                                         "Campaign name": "campaign_name",
                                                         "Date": "date",
                                                         "Spend in account currency": "spend",
                                                         "Impressions": "impressions",
                                                         "Impression Unique Users": "impressions_unique",
                                                         "Clicks": "clicks",
                                                         "Click Unique Users": "clicks_unique"})

pinterest_transformed["device_type"] = "other"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


pinterest_spend_reach_by_device_type = pinterest_transformed.groupby(["date","device_type"],as_index=False).sum()

pinterest_spend_reach_by_device_type_final = pinterest_spend_reach_by_device_type.loc[pinterest_spend_reach_by_device_type["device_type"] != "other", :]

pinterest_spend_reach_by_device_type_final.head(10)



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
pinterest_spend_reach_by_device_type_final["advertiser"] = "pinterest_int"

pinterest_spend_reach_clicks_ios = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "IOS",:]
pinterest_spend_reach_clicks_android = pinterest_spend_reach_by_device_type_final.loc[pinterest_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

# pinterest_spend_reach_clicks_android = 


display(pinterest_spend_reach_clicks_ios.head())
display(pinterest_spend_reach_clicks_android.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
1,2019-04-01,IOS,716.5,779799.0,522546.0,4914.0,4823.0,pinterest_int
4,2019-04-02,IOS,695.33,813398.0,540764.0,4944.0,4858.0,pinterest_int
7,2019-04-03,IOS,703.57,800287.0,508911.0,4647.0,4556.0,pinterest_int
10,2019-04-04,IOS,691.98,805840.0,502487.0,4564.0,4448.0,pinterest_int
13,2019-04-05,IOS,659.05,764811.0,497640.0,4552.0,4441.0,pinterest_int


Unnamed: 0,date,device_type,spend,impressions,impressions_unique,clicks,clicks_unique,advertiser
0,2019-04-01,ANDROID,0.0,15.0,0.0,0.0,0.0,pinterest_int
3,2019-04-02,ANDROID,0.0,9.0,0.0,0.0,0.0,pinterest_int
6,2019-04-03,ANDROID,14.83,17481.0,12717.0,112.0,111.0,pinterest_int
9,2019-04-04,ANDROID,147.14,154479.0,92727.0,1034.0,1020.0,pinterest_int
12,2019-04-05,ANDROID,150.0,181698.0,111306.0,1117.0,1103.0,pinterest_int


### Transform GOOGLE ADS DataFrame

In [202]:
# Create a filtered dataframe from specific columns
google_cols = ["Day", "Campaign", "Cost", 
                 "Impressions", "Views", "Clicks"]

google_transformed = google_df[google_cols].copy()

# Rename the column headers
google_transformed = google_transformed.rename(columns={"Day": "date",
                                                            "Campaign": "campaign_name",
                                                            "Cost": "spend",
                                                            "Impressions": "impressions",                                                     
                                                            "Views": "views",
                                                            "Clicks": "clicks"})

display(google_transformed["spend"].dtypes)

google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)

# pd.to_numeric(s, errors='ignore')

# pinterest_transformed["device_type"] = "other"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)IOS'),"device_type"] = "IOS"
# pinterest_transformed.loc[pinterest_transformed['campaign_name'].str.contains('(?i)ANDROID'),"device_type"] = "ANDROID"


google_transformed["device_type"] = "IOS"

google_spend_reach_by_device_type = google_transformed.groupby(["date","device_type"],as_index=False).sum()

google_spend_reach_by_device_type_final = google_spend_reach_by_device_type.loc[google_spend_reach_by_device_type["device_type"] != "other", :]

display(google_spend_reach_by_device_type_final.head())



# add the AppsFlyer advertiser name and Split out IOS and ANDROID into separate dataframes
google_spend_reach_by_device_type_final["advertiser"] = "googleadwords_int"


google_spend_reach_clicks_ios = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "IOS",:]
google_spend_reach_clicks_android = google_spend_reach_by_device_type_final.loc[google_spend_reach_by_device_type_final["device_type"] == "ANDROID",:]

display(google_spend_reach_clicks_ios.head())
display(google_spend_reach_clicks_android.head())




dtype('float64')

Unnamed: 0,date,device_type,spend,impressions,views,clicks
0,2019-04-24,IOS,514.63,95418,10756,279
1,2019-04-25,IOS,506.81,162903,13529,471
2,2019-04-26,IOS,81.79,46785,3566,137
3,2019-04-27,IOS,909.81,596122,23035,2005
4,2019-04-28,IOS,112.49,46903,2373,231


Unnamed: 0,date,device_type,spend,impressions,views,clicks,advertiser
0,2019-04-24,IOS,514.63,95418,10756,279,googleadwords_int
1,2019-04-25,IOS,506.81,162903,13529,471,googleadwords_int
2,2019-04-26,IOS,81.79,46785,3566,137,googleadwords_int
3,2019-04-27,IOS,909.81,596122,23035,2005,googleadwords_int
4,2019-04-28,IOS,112.49,46903,2373,231,googleadwords_int


Unnamed: 0,date,device_type,spend,impressions,views,clicks,advertiser


In [203]:
# google_transformed["impressions"] = google_transformed["impressions"].str.replace(",","").astype(int)
# google_transformed["views"] = google_transformed["views"].str.replace(",","").astype(int)
# google_transformed["clicks"] = google_transformed["clicks"].str.replace(",","").astype(int)
# # google_transformed[["impressions", "views"]] = google_transformed[["impressions", "views"]].apply(pd.to_numeric)


# google_transformed.head()

# APPSFLYER DATA PREPARATION

### Extract APPSFLYER CSVs into DataFrames

In [131]:
appsflyer_ios_file = "Resources/APRIL-APPSFLYER-IOS.csv"
appsflyer_ios_df = pd.read_csv(appsflyer_ios_file)
# display(appsflyer_ios_df.head())


appsflyer_android_file = "Resources/APRIL-APPSFLYER-ANDROID.csv"
appsflyer_android_df = pd.read_csv(appsflyer_android_file)
# display(appsflyer_android_df.head())

### SETUP VARIABLES TO USE FOR APPSFLYER IOS/ANDROID DataFrame Transformations

In [121]:
most_recent_ltv_column_prefix = "ltv_20190504"
paid_acquisition_advertisers = ["Facebook Ads", "pinterest_int", "googleadwords_int", "Apple Search Ads", "Organic"]


appsflyer_cols = ["Date", "Media Source (pid)", "Installs", "Sessions", 
                      "new_workout_saved (Unique users)", 
                      "af_purchase (Unique users)",
                      "af_purchase (Event counter)", 
                      "af_start_trial (Unique users)", 
                      "af_start_trial (Event counter)",
                      most_recent_ltv_column_prefix + " (Unique users)",
                      most_recent_ltv_column_prefix + " (Event counter)",
                      most_recent_ltv_column_prefix + " (Sales in USD)"]

### Transform APPSFLYER IOS DataFrame

In [119]:
appsflyer_ios_transformed = appsflyer_ios_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_ios_transformed = appsflyer_ios_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})


appsflyer_ios_grouped = appsflyer_ios_transformed.groupby(["date","advertiser"],as_index=False).sum()

appsflyer_ios_grouped_final = appsflyer_ios_grouped.loc[appsflyer_ios_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_ios_grouped_final.head()



Unnamed: 0,date,advertiser,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2019-04-01,Apple Search Ads,96,287,57,5,5,0,0,0,0,0.0
2,2019-04-01,Facebook Ads,658,2251,355,52,52,1,1,3,3,129.97
3,2019-04-01,Organic,877,0,468,38,41,0,0,0,0,0.0
8,2019-04-01,pinterest_int,864,2455,522,28,28,0,0,1,1,9.99
9,2019-04-02,Apple Search Ads,82,253,49,2,2,0,0,1,1,59.99


### Transform APPSFLYER ANDROID DataFrame

In [122]:
appsflyer_android_transformed = appsflyer_android_df[appsflyer_cols].copy()

# Rename the column headers
appsflyer_android_transformed = appsflyer_android_transformed.rename(columns={"Date": "date",
                                                         "Media Source (pid)": "advertiser",
                                                         "Installs": "installs",
                                                         "Sessions": "sessions",
                                                         "new_workout_saved (Unique users)": "new_workout_saved_unique",
                                                         "af_purchase (Unique users)": "af_purchase_unique",
                                                         "af_purchase (Event counter)": "af_purchase_all",
                                                         "af_start_trial (Unique users)": "af_start_trial_unique",
                                                         "af_start_trial (Event counter)": "af_start_trial_all",
                                                         most_recent_ltv_column_prefix + " (Unique users)": "ltv_subs_unique",
                                                         most_recent_ltv_column_prefix + " (Event counter)": "ltv_subs_all",
                                                         most_recent_ltv_column_prefix + " (Sales in USD)": "ltv_subs_revenue"})


appsflyer_android_grouped = appsflyer_android_transformed.groupby(["date","advertiser"],as_index=False).sum()

appsflyer_android_grouped_final = appsflyer_android_grouped.loc[appsflyer_android_grouped["advertiser"].isin(paid_acquisition_advertisers), :]

appsflyer_android_grouped_final.head()



Unnamed: 0,date,advertiser,installs,sessions,new_workout_saved_unique,af_purchase_unique,af_purchase_all,af_start_trial_unique,af_start_trial_all,ltv_subs_unique,ltv_subs_all,ltv_subs_revenue
0,2019-04-01,Facebook Ads,346,2308,223,20,20,2,2,10,10,449.90002
1,2019-04-01,Organic,142,0,73,5,7,0,0,0,0,0.0
4,2019-04-01,pinterest_int,1,3,1,0,0,0,0,0,0,0.0
5,2019-04-02,Facebook Ads,406,2330,252,17,18,1,1,8,8,379.92
6,2019-04-02,Organic,142,0,73,2,2,1,1,0,0,0.0


# COMBINE APPSFLYER AND ADVERTISER DATA TOGETHER

### IOS Combination

In [205]:
# facebook_spend_reach_clicks_ios
# pinterest_spend_reach_clicks_ios
# google_spend_reach_clicks_ios
# appsflyer_ios_grouped_final

advertisers_spend_reach_clicks_ios = pd.concat([facebook_spend_reach_clicks_ios, pinterest_spend_reach_clicks_ios, google_spend_reach_clicks_ios], ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_ios = pd.merge(advertisers_spend_reach_clicks_ios, appsflyer_ios_grouped_final, on=["date","advertiser"], how="left")
# merge_table_ios = pd.merge(advertisers_spend_reach_clicks_ios, appsflyer_ios_grouped_final, on=["date","advertiser"], how="outer")

merge_table_ios.head(100)


merge_table_ios.to_csv("Output/merged_IOS_data.csv", index=False, header=True)

### ANDROID Combination

In [206]:
facebook_spend_reach_clicks_android.head()
pinterest_spend_reach_clicks_android.head()
appsflyer_android_grouped_final.head()


advertisers_spend_reach_clicks_android = pd.concat([facebook_spend_reach_clicks_android, pinterest_spend_reach_clicks_android], ignore_index=True, sort=False).sort_values(by=['date'])

merge_table_android = pd.merge(advertisers_spend_reach_clicks_android, appsflyer_android_grouped_final, on=["date","advertiser"], how="left")
# merge_table_android = pd.merge(advertisers_spend_reach_clicks_android, appsflyer_android_grouped_final, on=["date","advertiser"], how="outer")

merge_table_android.head(100)

merge_table_android.to_csv("Output/merged_ANDROID_data.csv", index=False, header=True)

# DATABASE OPPERATIONS

### Create database connection

In [6]:
connection_string = "root:password123@localhost/customer_db"
engine = create_engine(f'mysql://{connection_string}')

In [7]:
# Confirm tables
engine.table_names()

['customer_location', 'customer_name']

### Load DataFrames into database

In [8]:
premise_transformed.to_sql(name='premise', con=engine, if_exists='append', index=True)

In [9]:
county_transformed.to_sql(name='county', con=engine, if_exists='append', index=True)