In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
import config 

In [2]:
# get module variable for root directory
ROOT_DIR = config.ROOT_DIR

#### Transform AdventureWorks_Calendar

In [3]:
aw_calendar_dim = pd.read_csv(rf"{ROOT_DIR}/../storage/raw/AdventureWorks_Calendar.csv")

In [4]:
aw_calendar_dim.head()

Unnamed: 0,Date
0,1/1/2015
1,1/2/2015
2,1/3/2015
3,1/4/2015
4,1/5/2015


In [5]:
aw_calendar_dim.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 912 entries, 0 to 911
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    912 non-null    object
dtypes: object(1)
memory usage: 7.2+ KB


In [6]:
# convert Date column to datetime[ns] data type
aw_calendar_dim["Date"] = pd.to_datetime(aw_calendar_dim["Date"], format="%m/%d/%Y")

In [7]:
# extract various date parts from date field
aw_calendar_dim["DayName"] = aw_calendar_dim["Date"].dt.day_name()
aw_calendar_dim["MonthName"] = aw_calendar_dim["Date"].dt.month_name()
aw_calendar_dim["Year"] = aw_calendar_dim["Date"].dt.year
aw_calendar_dim["StartOfYear"] = aw_calendar_dim["Date"].dt.to_period("Y").dt.to_timestamp()
aw_calendar_dim["StartOfMonth"] = aw_calendar_dim["Date"].dt.to_period("M").dt.to_timestamp()
aw_calendar_dim["StartOfWeek"] = aw_calendar_dim["Date"].dt.to_period("W").dt.start_time

aw_calendar_dim["DayOfWeek"] = aw_calendar_dim["Date"].dt.dayofweek + 1 # Monday = 1, Sunday = 7
aw_calendar_dim["Weekend"] = np.where((aw_calendar_dim["DayOfWeek"] == 6) | (aw_calendar_dim["DayOfWeek"] == 7), "Weekend", "Weekday")

In [8]:
# extract short day and short month from day name and month name respectively
aw_calendar_dim["ShortDay"] = aw_calendar_dim["DayName"].str.slice(stop=3).str.upper()
aw_calendar_dim["ShortMonth"] = aw_calendar_dim["MonthName"].str.slice(stop=3).str.upper()

In [9]:
aw_calendar_dim.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 912 entries, 0 to 911
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          912 non-null    datetime64[ns]
 1   DayName       912 non-null    object        
 2   MonthName     912 non-null    object        
 3   Year          912 non-null    int64         
 4   StartOfYear   912 non-null    datetime64[ns]
 5   StartOfMonth  912 non-null    datetime64[ns]
 6   StartOfWeek   912 non-null    datetime64[ns]
 7   DayOfWeek     912 non-null    int64         
 8   Weekend       912 non-null    object        
 9   ShortDay      912 non-null    object        
 10  ShortMonth    912 non-null    object        
dtypes: datetime64[ns](4), int64(2), object(5)
memory usage: 78.5+ KB


In [10]:
aw_calendar_dim.head()

Unnamed: 0,Date,DayName,MonthName,Year,StartOfYear,StartOfMonth,StartOfWeek,DayOfWeek,Weekend,ShortDay,ShortMonth
0,2015-01-01,Thursday,January,2015,2015-01-01,2015-01-01,2014-12-29,4,Weekday,THU,JAN
1,2015-01-02,Friday,January,2015,2015-01-01,2015-01-01,2014-12-29,5,Weekday,FRI,JAN
2,2015-01-03,Saturday,January,2015,2015-01-01,2015-01-01,2014-12-29,6,Weekend,SAT,JAN
3,2015-01-04,Sunday,January,2015,2015-01-01,2015-01-01,2014-12-29,7,Weekend,SUN,JAN
4,2015-01-05,Monday,January,2015,2015-01-01,2015-01-01,2015-01-05,1,Weekday,MON,JAN


In [11]:
# write transformed calendar data to storage - processed layer for powerbi consumption
aw_calendar_dim.to_csv(rf"{ROOT_DIR}/../storage/processed/aw_calendar_dim.csv", index=False)