# Create Features

## Libraries, options, paths, and functions

In [2]:
#import libraries
import pandas as pd
import datetime as dt
import featuretools as ft

# set options
pd.set_option('display.float_format', '{:,.2f}'.format)

# define paths

# define functions
def qqagr(df,mev,mev_g):
    for i in range(1,len(df)):
        df.loc[i,mev_g] = ((df.loc[i,mev] / df.loc[i-1,mev])**4 - 1)*100

## Load data

In [3]:
# load raw_mevs_ca data
with pd.HDFStore('./stats_can_data/raw_mevs_ca.hdf5') as data:
    mevs_ca = data['raw_mevs_ca']
    metadata = data.get_storer('raw_mevs_ca').attrs.metadata

display(mevs_ca.tail())

Unnamed: 0,ref_date,geo,pop_ca,gdp_ca
244,2022-01-01,Canada,38516138,2155250
245,2022-04-01,Canada,38644920,2174582
246,2022-07-01,Canada,38929902,2186724
247,2022-10-01,Canada,39292355,2185910
248,2023-01-01,Canada,39566248,2202921


## Dates

In [4]:
mevs_ca['year'] = mevs_ca['ref_date'].dt.year
mevs_ca['quarter'] = mevs_ca['ref_date'].dt.quarter

display(mevs_ca.tail())

Unnamed: 0,ref_date,geo,pop_ca,gdp_ca,year,quarter
244,2022-01-01,Canada,38516138,2155250,2022,1
245,2022-04-01,Canada,38644920,2174582,2022,2
246,2022-07-01,Canada,38929902,2186724,2022,3
247,2022-10-01,Canada,39292355,2185910,2022,4
248,2023-01-01,Canada,39566248,2202921,2023,1


## Growth rates

In [5]:
qqagr(mevs_ca,'gdp_ca','gdp_ca_g')
qqagr(mevs_ca,'pop_ca','pop_ca_g')
    
display(mevs_ca.head())
display(mevs_ca.tail())

Unnamed: 0,ref_date,geo,pop_ca,gdp_ca,year,quarter,gdp_ca_g,pop_ca_g
0,1961-01-01,Canada,18092000,342687,1961,1,,
1,1961-04-01,Canada,18172000,351364,1961,2,10.52,1.78
2,1961-07-01,Canada,18271000,359914,1961,3,10.09,2.2
3,1961-10-01,Canada,18363000,364811,1961,4,5.55,2.03
4,1962-01-01,Canada,18442000,374518,1962,1,11.08,1.73


Unnamed: 0,ref_date,geo,pop_ca,gdp_ca,year,quarter,gdp_ca_g,pop_ca_g
244,2022-01-01,Canada,38516138,2155250,2022,1,2.59,0.94
245,2022-04-01,Canada,38644920,2174582,2022,2,3.64,1.34
246,2022-07-01,Canada,38929902,2186724,2022,3,2.25,2.98
247,2022-10-01,Canada,39292355,2185910,2022,4,-0.15,3.78
248,2023-01-01,Canada,39566248,2202921,2023,1,3.15,2.82


## Generate features

In [6]:
dataframes = {
   "mevs_ca" : (customers_df, "customer_id"),
   "sessions" : (sessions_df, "session_id", "session_start"),
   "transactions" : (transactions_df, "transaction_id", "transaction_time")
}

feature_matrix, features_defs = ft.dfs(dataframes=dataframes,
                                                 relationships=relationships,
                                                 target_dataframe_name="customers")
feature_matrix

NameError: name 'customers_df' is not defined

## Combine and save

In [None]:
save = pd.HDFStore('./stats_can_data/mevs_ca.hdf5')
save.put('mevs_ca', mevs_ca)
metadata = {'scale':0.1,'offset':15}
save.get_storer('mevs_ca').attrs.metadata = metadata
save.close()