# Travel Pony Ad Analyses
## This code analyses advertising campaign engagement data from Facebook
### By Cathy Robison

In [10]:
import pandas as pd
import numpy as np 

#Data can be found on github repo
tp = pd.read_csv('TravelPonyFacebook.csv')
print(tp.head(2))


  Start Date   End Date  Campaign Name   Reach  Frequency  Impressions  \
0  12/2/2013  12/2/2013  Cyber Weekend  278117   2.314303       643647   
1  12/1/2013  12/1/2013  Cyber Weekend  221255   2.079090       460009   

   Clicks  Unique Clicks  Amount Spent (USD)  Page Likes  Page Engagement  \
0     336            313               300.0           3              246   
1     376            348               300.0           6              215   

   Post Engagement  Post Likes  Post Comments  Post Shares  Photo Views  \
0              243           0              0            0            0   
1              209           0              0            0            0   

   Website Clicks  Cost per Page Engagement (USD)  
0             243                        1.219512  
1             209                        1.395349  


In [2]:
#Creating analytics 'cost per impression' by dividing Amount Spent / Impressions.
tp['cost per impression'] = tp['Amount Spent (USD)'] / tp['Impressions']


In [3]:
#When considering 'cost per impression', what day of the week works best? (What day is it cheapest to 
#generate impressions) What day works worst? (What day is most expensive)
import arrow
dayofweek = []

for row in range(len(tp)):
    date = arrow.get(tp['Start Date'][row], 'M/D/YY')
    dayofweek.append(date.weekday())
    
tp['Day']= dayofweek

In [4]:
group = tp.groupby('Day')
group.aggregate(np.mean)


Unnamed: 0_level_0,Reach,Frequency,Impressions,Clicks,Unique Clicks,Amount Spent (USD),Page Likes,Page Engagement,Post Engagement,Post Likes,Post Comments,Post Shares,Photo Views,Website Clicks,Cost per Page Engagement (USD),cost per impression
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,2281.755511,1.014517,3152.184369,10.014028,8.857715,9.625511,0.923848,9.573146,8.649299,1.102204,0.048096,0.212425,3.59519,3.691383,0.415395,0.002629
1,2203.596639,1.016073,2775.644958,9.981092,8.808824,9.676534,0.831933,8.794118,7.960084,0.962185,0.029412,0.218487,3.220588,3.529412,0.49108,0.003688
2,1842.864177,1.017603,2525.074703,7.536503,6.521222,7.427097,0.670628,7.634975,6.964346,1.095076,0.040747,0.225806,2.994907,2.60781,0.492924,0.00291
3,1889.21519,1.012193,1926.738397,9.419831,8.151899,9.455084,1.094937,9.679325,8.584388,1.280591,0.065401,0.202532,3.877637,3.158228,0.592907,0.00304
4,1494.235195,1.009059,1516.478849,7.781726,6.524535,7.956024,0.93401,8.13198,7.19797,1.241963,0.067682,0.187817,3.225042,2.475465,0.539237,0.002981
5,1413.081882,1.011668,1449.837979,8.054007,6.980836,7.640209,0.855401,8.634146,7.778746,1.37108,0.094077,0.200348,4.012195,2.101045,0.560598,0.003398
6,2238.474104,1.012024,3037.984064,10.14741,8.848606,10.014741,0.844622,10.635458,9.790837,1.424303,0.051793,0.250996,4.74502,3.318725,0.476606,0.004097


In [5]:
#another way to do it...
def get_weekday(date):
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    return weekdays[arrow.get(date, 'M/D/YY').weekday()]


tp['weekday'] = tp['Start Date'].apply(get_weekday)

print(tp.groupby('weekday')['cost per impression'].mean().sort_values())



weekday
Mon    0.002629
Wed    0.002910
Fri    0.002981
Thu    0.003040
Sat    0.003398
Tue    0.003688
Sun    0.004097
Name: cost per impression, dtype: float64


In [6]:
#Calculate the correlation between Amount Spent & Reach, Frequency, Unique Clicks, and Page Likes
AandR = np.corrcoef(tp['Amount Spent (USD)'], tp['Reach']) #Correlation is 0.703
AandF = np.corrcoef(tp['Amount Spent (USD)'], tp['Frequency']) #Correlation is 0.130
AandU = np.corrcoef(tp['Amount Spent (USD)'], tp['Unique Clicks']) #Correlation is 0.883
AandPL = np.corrcoef(tp['Amount Spent (USD)'], tp['Page Likes']) #Correlation is 0.758

In [7]:
print (AandR)
print (AandF)
print (AandU)
print (AandPL)

[[1.         0.70312381]
 [0.70312381 1.        ]]
[[1.         0.13020087]
 [0.13020087 1.        ]]
[[1.         0.88299318]
 [0.88299318 1.        ]]
[[1.         0.75761193]
 [0.75761193 1.        ]]


In [8]:
#Perform a simple multiple regression analysis (Links to an external site.)Links to an external site. 
#where Unique Clicks is the dependent variable and Reach and Frequency are the independent (predictor) variables.
import statsmodels.api as sm

y=tp['Unique Clicks']
x=tp[['Reach', 'Frequency']]

x = sm.add_constant(x)
model = sm.OLS(y,x).fit()
predictions = model.predict(x)

print (model.summary())

                            OLS Regression Results                            
Dep. Variable:          Unique Clicks   R-squared:                       0.534
Model:                            OLS   Adj. R-squared:                  0.534
Method:                 Least Squares   F-statistic:                     2124.
Date:                Thu, 18 Oct 2018   Prob (F-statistic):               0.00
Time:                        15:16:08   Log-Likelihood:                -15907.
No. Observations:                3705   AIC:                         3.182e+04
Df Residuals:                    3702   BIC:                         3.184e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         29.2277      2.525     11.575      0.0