# Creating a date dimension table

In [1]:
# importing data analysis library
import pandas as pd
import numpy as np

In [2]:
# first column
# creating the base range
time_range = pd.date_range(start= '00:00:00',
                          end= '23:59:59',
                          freq= 'min')

# range as df with format dd-mm-yyyy
time_dim = pd.DataFrame(data= time_range,
                        columns= ['time'])
time_dim['time'] = time_dim['time'].dt.strftime('%H:%M:%S')

# displaying the df so far
time_dim

Unnamed: 0,time
0,00:00:00
1,00:01:00
2,00:02:00
3,00:03:00
4,00:04:00
...,...
1435,23:55:00
1436,23:56:00
1437,23:57:00
1438,23:58:00


In [3]:
# extracting features from the date
time_dim['hour'] = time_dim['time'].apply(lambda x: 
                                          pd.to_datetime(x, 
                                          format= '%H:%M:%S')
                                          .hour)
time_dim['minute'] = time_dim['time'].apply(lambda x:
                                            pd.to_datetime(
                                            x, format= '%H:%M:%S')
                                            .minute)
time_dim['AM_PM'] = time_dim['time'].apply(lambda x:
                                               pd.to_datetime(
                                                x, format= '%H:%M:%S')
                                               .strftime('%p'))
time_dim['min_of_day'] = time_dim['hour'] * 60 + time_dim['minute']
time_dim['sec_of_day'] = time_dim['min_of_day'] * 60 \
                            + time_dim['time'].apply(lambda x:
                                           pd.to_datetime(x,
                                           format= '%H:%M:%S')
                                           .second)
time_dim['QuarterHour'] = time_dim['minute'] // 15 + 1
time_dim['TimePeriod'] = time_dim['hour'].apply(lambda x:
                            'Morning' if 5 <= x < 12 
                            else 'Afternoon' if 12 <= x < 17 
                            else 'Evening' if 17 <= x < 21
                            else 'Night')
time_dim['Shift'] = time_dim['hour'].apply(lambda x:
                            'Morning Shift' if 6 <= x < 14
                            else 'Evening Shift' if 14 <= x < 22
                            else 'Night Shift')
time_dim['IsWorkHour'] = time_dim['hour'].apply(lambda x:
                                                9 <= x <= 17)
time_dim['AM-PM_string'] = time_dim['time'].apply(lambda x:
                                    pd.to_datetime(
                                     x, format= '%H:%M:%S')
                                    .strftime('%I:%M %p'))
time_dim['24h_string'] = time_dim['time'].apply(lambda x: 
                                    pd.to_datetime(
                                    x, format='%H:%M:%S')
                                    .strftime('%Hh%M'))

time_dim.sample(5)

Unnamed: 0,time,hour,minute,AM_PM,min_of_day,sec_of_day,QuarterHour,TimePeriod,Shift,IsWorkHour,AM-PM_string,24h_string
1301,21:41:00,21,41,PM,1301,78060,3,Night,Evening Shift,False,09:41 PM,21h41
1228,20:28:00,20,28,PM,1228,73680,2,Evening,Evening Shift,False,08:28 PM,20h28
1054,17:34:00,17,34,PM,1054,63240,3,Evening,Evening Shift,True,05:34 PM,17h34
237,03:57:00,3,57,AM,237,14220,4,Night,Night Shift,False,03:57 AM,03h57
825,13:45:00,13,45,PM,825,49500,4,Afternoon,Morning Shift,True,01:45 PM,13h45


## Exporting to .csv for use in other data modelling software

In [4]:
time_dim.to_csv('date_dim.csv', index= False)