In [None]:
pip install holidays



In [3]:
#Import Statement
from datetime import date
import holidays
import pandas as pd
import warnings
import io
from google.colab import files
warnings.filterwarnings("ignore")

In [None]:
#Identify the holidays in Australia
australia_holidays = holidays.Australia(years = 2021, prov = 'VIC')
holidaylist = []  
for date, name in australia_holidays.items():
  holidaylist.append(date.strftime("%m/%d/%Y"))
  print(date,name)
print(holidaylist)

2021-01-01 New Year's Day
2021-01-26 Australia Day
2021-04-02 Good Friday
2021-04-03 Easter Saturday
2021-04-04 Easter Sunday
2021-04-05 Easter Monday
2021-04-25 Anzac Day
2021-06-14 Queen's Birthday
2021-03-08 Labour Day
2021-09-24 Grand Final Day
2021-11-02 Melbourne Cup
2021-12-25 Christmas Day
2021-12-27 Christmas Day (Observed)
2021-12-26 Boxing Day
2021-12-28 Boxing Day (Observed)
['01/01/2021', '01/26/2021', '04/02/2021', '04/03/2021', '04/04/2021', '04/05/2021', '04/25/2021', '06/14/2021', '03/08/2021', '09/24/2021', '11/02/2021', '12/25/2021', '12/27/2021', '12/26/2021', '12/28/2021']


In [None]:
#Import statement
from datetime import timedelta, date

#Generate a list of dates March 2021 to August 2021
datelist = []
def daterange(date1, date2):
    for n in range(int ((date2 - date1).days)+1):
        yield date1 + timedelta(n)

start_dt = date(2021, 3, 1)
end_dt = date(2021, 8, 31)
for dt in daterange(start_dt, end_dt):
    datelist.append(dt.strftime("%m/%d/%Y"))

In [None]:
#Create new dataframe and assign date column
dataset = pd.DataFrame(datelist, columns = ['date'])
dataset.head()

Unnamed: 0,date
0,03/01/2021
1,03/02/2021
2,03/03/2021
3,03/04/2021
4,03/05/2021


In [None]:
#Add new column school day and holiday
dataset['school_day'] = 'Y'
dataset['holiday'] = 'N'
dataset.head()

Unnamed: 0,date,school_day,holiday
0,03/01/2021,Y,N
1,03/02/2021,Y,N
2,03/03/2021,Y,N
3,03/04/2021,Y,N
4,03/05/2021,Y,N


In [None]:
#Modify the label of holiday column depending on identified holidays
dataset['holiday'].loc[dataset['date'].isin(holidaylist)] = 'Y'
dataset['school_day'].loc[dataset['date'].isin(holidaylist)] = 'N'
dataset.loc[dataset['date'].isin(holidaylist)]

Unnamed: 0,date,school_day,holiday
7,03/08/2021,N,Y
32,04/02/2021,N,Y
33,04/03/2021,N,Y
34,04/04/2021,N,Y
35,04/05/2021,N,Y
55,04/25/2021,N,Y
105,06/14/2021,N,Y


In [None]:
#List for school term holidays
datelist = []
for dt in daterange(date(2021, 4, 2), date(2021, 4, 18)):
    datelist.append(dt.strftime("%m/%d/%Y"))
for dt in daterange(date(2021, 6, 26), date(2021, 7, 11)):
    datelist.append(dt.strftime("%m/%d/%Y"))

In [None]:
#Modify the label for school term holidays
dataset['school_day'].loc[dataset['date'].isin(datelist)] = 'N'

In [None]:
#Generate each sunday in year 2021
from datetime import date, timedelta
datelist = []
def allsundays(year):
   d = date(year, 1, 1)                    # January 1st
   d += timedelta(days = 6 - d.weekday())  # First Sunday
   while d.year == year:
      yield d
      d += timedelta(days = 7)
for d in allsundays(2021):
   datelist.append(d.strftime("%m/%d/%Y"))

In [None]:
#Modify the label of the school day for Sundays
dataset['school_day'].loc[dataset['date'].isin(datelist)] = 'N'

In [None]:
#Generate each Saturday in year 2021
from datetime import date, timedelta
datelist = []
def allsaturday(year):
   d = date(year, 1, 1)                    # January 1st
   d += timedelta(days = 5 - d.weekday())  # First saturday
   while d.year == year:
      yield d
      d += timedelta(days = 7)
for d in allsaturday(2021):
   datelist.append(d.strftime("%m/%d/%Y"))

In [None]:
#Modify the label of the school day for Saturdays
dataset['school_day'].loc[dataset['date'].isin(datelist)] = 'N'

In [None]:
dataset.head(7)

Unnamed: 0,date,school_day,holiday
0,03/01/2021,Y,N
1,03/02/2021,Y,N
2,03/03/2021,Y,N
3,03/04/2021,Y,N
4,03/05/2021,Y,N
5,03/06/2021,N,N
6,03/07/2021,N,N


In [None]:
#Converting dataframe to csv file and downloading the file
dataset.to_csv('Schooldayholiday.csv') 
files.download('Schooldayholiday.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
#Uploading files in colab
uploaded = files.upload()

Saving PRICE_AND_DEMAND_202103_VIC1.csv to PRICE_AND_DEMAND_202103_VIC1.csv


In [5]:
#Function to calculate the RRP attribute for March month
def update_dataset(data):
  data[['DATE','TIME']] = data.SETTLEMENTDATE.str.split(" ",expand=True)
  rowcount = data['SETTLEMENTDATE'].count()
  data = data.iloc[:(rowcount - 1)]
  data = data.groupby('DATE').mean()
  data = data.drop(columns=['TOTALDEMAND'])
  return data

In [6]:
#Read the CSV file for March month
data_MarchRRP = pd.read_csv(io.BytesIO(uploaded['PRICE_AND_DEMAND_202103_VIC1.csv']))

In [7]:
#Update the RRP attribute in the data set
data_MarchRRP = update_dataset(data_MarchRRP)
data_MarchRRP.tail()

Unnamed: 0_level_0,RRP
DATE,Unnamed: 1_level_1
2021/03/27,31.953958
2021/03/28,33.573333
2021/03/29,30.380417
2021/03/30,37.243958
2021/03/31,35.296042


In [8]:
#Download the data set as csv file
data_MarchRRP.to_csv('RRPAttribute.csv') 
files.download('RRPAttribute.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>