In [None]:
!apt -qq update
!apt -qq install chromium-chromedriver
!pip -qq install selenium
!pip install -qq python-pptx

In [1]:
%cd /content/drive/My Drive/Colab Notebooks/AMEX/ResponseEngineDataUpdate_v2

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import calendar
import time
import glob
import os

import sys
sys.path.append('01_utils')
from hlpr import *

/content/drive/My Drive/Colab Notebooks/AMEX/ResponseEngineDataUpdate_v2


# Static

In [14]:
# timestamp
ts = int(time.time())
dt = datetime.strftime(datetime.now(), '%Y%m%d')

# paths
ROOT = '/content/drive/My Drive/Colab Notebooks/AMEX/ResponseEngineDataUpdate_v2'
UTILS_PATH = os.path.join(ROOT, '01_utils')
OUTPUTS_PATH = os.path.join(ROOT, '03_outputs')
GOOGLE_PATH = os.path.join(ROOT, '02_data/google')
GOOGLE_YTD_PATH = os.path.join(GOOGLE_PATH, 'current')
GOOGLE_HIST_PATH = os.path.join(GOOGLE_PATH, 'historical')
GOOGLE_CITIES_PATH = os.path.join(GOOGLE_PATH, 'cities')

# category mapping
cat = pd.read_excel(os.path.join(UTILS_PATH, 'map.xlsx'), sheet_name='terms', usecols=[0,1,2])
cat['term'] = cat['term'].str.strip().str.lower()
cat.rename(columns={'term':'variable'}, inplace=True)

# market mapping
mkt = pd.read_excel(os.path.join(UTILS_PATH, 'map.xlsx'), sheet_name='markets')
mkt_country = mkt[mkt['priority']==1].set_index('market_abbr').to_dict()['market']
mkt_country['GB'] = 'United Kingdom'
mkt_country['UK'] = 'United Kingdom'

In [15]:
mkt_country

{'AU': 'Australia',
 'CA': 'Canada',
 'GB': 'United Kingdom',
 'MX': 'Mexico',
 'UK': 'United Kingdom',
 'US': 'United States',
 'US-CA-803': 'Los Angeles',
 'US-CA-807': 'San Francisco',
 'US-IL-602': 'Chicago',
 'US-NY-501': 'New York City',
 'US-VA-511': 'Washington, DC'}

# Pre-Processing

In [4]:
# import data
paths = [GOOGLE_HIST_PATH, GOOGLE_YTD_PATH, GOOGLE_CITIES_PATH]
# paths = [GOOGLE_CITIES_PATH]
data_paths = [max(glob.glob(os.path.join(p, '*.csv')), key=os.path.getmtime) for p in paths]
df = pd.concat([pd.read_csv(d) for d in data_paths], sort=False).reset_index(drop=True)

# clean columns
df['date'] = pd.to_datetime(df['date'])
df['variable'] = df['variable'].str.strip().str.lower()

# add date variables
df['month'] = df['date'].apply(lambda x: datetime(x.year, x.month, 1))
df['month_no'] = df['date'].dt.month
df['month_name'] = df['month_no'].apply(lambda x: calendar.month_abbr[x])
df['year_no'] = df['date'].dt.year

# group data
df_grp = df.groupby(['month', 'month_no', 'month_name', 'year_no', 'market', 'variable'])['value'].mean().to_frame().reset_index()
frame = df_grp[['month_no', 'month_name', 'market', 'variable']].drop_duplicates()

In [5]:
# add year data to frame
metrics = []
for y in df_grp['year_no'].unique():
  df_year = df_grp.copy()
  df_year = df_year[df_year['year_no']==y].reset_index(drop=True)
  df_year.rename(columns={'value':f'value_{y}'}, inplace=True)
  df_year.drop(columns=['month', 'year_no'], inplace=True)
  frame = frame.merge(df_year, how='left')
  metrics.append(f'value_{y}')

frame_base = frame.drop(columns=metrics)

# Processing

In [10]:
df_covid['market_abbr'] = df_covid['market'].str.strip()

In [11]:
df_covid['market'] = df_covid['market'].apply(lambda x: mkt_country[x] if x in mkt_country.keys() else None)

In [20]:
df_covid = df_covid[-df_covid['market'].isna()].reset_index(drop=True)

In [21]:
df_covid

Unnamed: 0,month_no,month_name,year_no,market,variable,index,metric,market_abbr
0,3,Mar,2020,Australia,3x points,,% Chg vs. Covid,AU
1,3,Mar,2020,Australia,acl,0.027476,% Chg vs. Covid,AU
2,3,Mar,2020,Australia,airbnb,-0.355667,% Chg vs. Covid,AU
3,3,Mar,2020,Australia,airport lounges,-0.628484,% Chg vs. Covid,AU
4,3,Mar,2020,Australia,alanis morrissette,,% Chg vs. Covid,AU
...,...,...,...,...,...,...,...,...
21671,2,Feb,2021,United States,wimbledon tickets,-0.887513,% Chg vs. Covid,US
21672,2,Feb,2021,United States,wine delivery,2.470050,% Chg vs. Covid,US
21673,2,Feb,2021,United States,winery near me,-0.039220,% Chg vs. Covid,US
21674,2,Feb,2021,United States,winter vacation,-0.439044,% Chg vs. Covid,US


In [8]:
df_smly = google_smly(df, frame, frame_base, metrics)
df_mom = google_mom(df_grp)
df_abs = google_abs(df_grp)
df_covid = google_covid(df_grp)
df_all = google_combine_mom(df_smly, df_mom, df_abs, df_covid, mkt_country, cat, dt)

# # export
df_all.to_csv(os.path.join(OUTPUTS_PATH, 'df_google_cities_mom.csv'), index=False)

KeyError: ignored

In [None]:
# df_all = google_combine_mom(df_smly, df_mom, df_abs, df_covid, mkt_country, cat, dt)

mkt_country

{'AU': 'Australia',
 'CA': 'Canada',
 'CHI-IL': 'US-IL-602',
 'CHI-IN': 'US-IN-602',
 'DC-MD': 'US-MD-511',
 'DC-VA': 'US-VA-511',
 'DC-WV': 'US-WV-511',
 'GB': 'United Kingdom',
 'LA': 'US-CA-803',
 'MX': 'Mexico',
 'NYC': 'US-NY-501',
 'SF': 'US-CA-807',
 'UK': 'United Kingdom',
 'US': 'United States'}

In [None]:
def process_google_mom():
    # timestamp
    ts = int(time.time())
    dt = datetime.strftime(datetime.now(), '%Y%m%d')

    # paths
    ROOT = '/content/drive/My Drive/Colab Notebooks/AMEX/ResponseEngineDataUpdate_v2'
    UTILS_PATH = os.path.join(ROOT, '01_utils')
    OUTPUTS_PATH = os.path.join(ROOT, '03_outputs')
    GOOGLE_PATH = os.path.join(ROOT, '02_data/google')
    GOOGLE_YTD_PATH = os.path.join(GOOGLE_PATH, 'current')
    GOOGLE_HIST_PATH = os.path.join(GOOGLE_PATH, 'historical')

    # category mapping
    cat = pd.read_excel(os.path.join(UTILS_PATH, 'map.xlsx'), sheet_name='terms', usecols=[0,1,2])
    cat['term'] = cat['term'].str.strip().str.lower()
    cat.rename(columns={'term':'variable'}, inplace=True)

    # market mapping
    mkt = pd.read_excel(os.path.join(UTILS_PATH, 'map.xlsx'), sheet_name='markets')
    mkt_country = mkt[mkt['priority']==1].set_index('market_abbr').to_dict()['market']
    mkt_country['GB'] = 'United Kingdom'
    mkt_country['UK'] = 'United Kingdom'

    # import data
    paths = [GOOGLE_HIST_PATH, GOOGLE_YTD_PATH]
    data_paths = [max(glob.glob(os.path.join(p, '*.csv')), key=os.path.getmtime) for p in paths]
    df = pd.concat([pd.read_csv(d) for d in data_paths], sort=False).reset_index(drop=True)

    # clean columns
    df['date'] = pd.to_datetime(df['date'])
    df['variable'] = df['variable'].str.strip().str.lower()

    # add date variables
    df['month'] = df['date'].apply(lambda x: datetime(x.year, x.month, 1))
    df['month_no'] = df['date'].dt.month
    df['month_name'] = df['month_no'].apply(lambda x: calendar.month_abbr[x])
    df['year_no'] = df['date'].dt.year

    # group data
    df_grp = df.groupby(['month', 'month_no', 'month_name', 'year_no', 'market', 'variable'])['value'].mean().to_frame().reset_index()
    frame = df_grp[['month_no', 'month_name', 'market', 'variable']].drop_duplicates()

    # add year data to frame
    metrics = []
    for y in df_grp['year_no'].unique():
      df_year = df_grp.copy()
      df_year = df_year[df_year['year_no']==y].reset_index(drop=True)
      df_year.rename(columns={'value':f'value_{y}'}, inplace=True)
      df_year.drop(columns=['month', 'year_no'], inplace=True)
      frame = frame.merge(df_year, how='left')
      metrics.append(f'value_{y}')

    frame_base = frame.drop(columns=metrics)

    df_smly = google_smly(df, frame, frame_base, metrics)
    df_mom = google_mom(df_grp)
    df_abs = google_abs(df_grp)
    df_covid = google_covid(df_grp)
    df_all = google_combine_mom(df_smly, df_mom, df_abs, df_covid, mkt_country, cat, dt)

    # export
    df_all.to_csv(os.path.join(OUTPUTS_PATH, 'df_google_cities_mom.csv'), index=False)