In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 18 13:52:20 2018

@author: Freddie Zhang
"""

import pandas as pd
import arrow
import statsmodels.api as sm

# load data and preview
df = pd.read_csv("Travel Pony Facebook.csv")
df.info()

# 1. Create the analytics 'cost per impression'
# What day of the week works best? 
# What day of the week works worst? 
# The larger CPI is, the better the day is.
df['Cost Per Impression'] = df['Amount Spent (USD)'] / df['Impressions']

dayofweek = []
for i, row in df.iterrows():
    day = (arrow.get(str(df['Start Date'][i]), 'M/D/YY')).format('dddd')
    dayofweek.append(day)
df['Day of Week'] = dayofweek

ranking = pd.DataFrame(df.groupby(['Day of Week']).mean()['Cost Per Impression'].sort_values(ascending=False)).reset_index()
print(ranking)
print(ranking['Day of Week'][6],"works best.")
print(ranking['Day of Week'][0],"works worst.")

In [None]:
# 2. Compute the correlation
print(df[['Amount Spent (USD)','Reach','Frequency','Unique Clicks','Page Likes']].corr())
print("The correlation between Amount Spent and Unique Clicks is the strongest with the value of 0.882993. Practically speaking, more unique clicks will most significantly increase the amount spent. ")

# 3. Perform a simple multiple regression analysis
DV = df["Unique Clicks"]
IV = df[["Reach","Frequency"]]

model = sm.OLS(DV, IV).fit()
predictions = model.predict(IV) # make the predictions by the model

model.summary()
print("Frequency most strongly predicts unique clicks. It means as the Frequency increases by 1, the predicted value of Unique Clicks increases by 3.6139.")