In [95]:
# https://preppindata.blogspot.com/2023/07/2023-week-27-cost-of-running-prep-school.html

import pandas as pd
import numpy as np
from datetime import date

### Input the data - Consolidate the data from the 4 different excel sheets into 1 data set

In [96]:
df = pd.concat(
    pd.read_excel(r'data\PD 2021 Wk 27 Input.xlsx',sheet_name=None)
)
df = df.reset_index(level=0, drop=True)
df

Unnamed: 0,School Name,Year,Month,Name,Value
0,School A,2021,January,Electricity Cost,5000
1,School A,2021,January,Water Cost,1000
2,School A,2021,January,Gas Cost,800
3,School A,2021,January,Maintenance Cost,1500
4,School A,2021,February,Electricity Cost,4800
...,...,...,...,...,...
43,School D,2021,November,Maintenance Cost,1220
44,School D,2021,December,Electricity Cost,4250
45,School D,2021,December,Water Cost,780
46,School D,2021,December,Gas Cost,630


### The executives would like the data transformed so each row is the monthly costs broken down into each category, for each school

In [97]:
df.columns

Index(['School Name', 'Year', 'Month', 'Name', 'Value'], dtype='object')

In [98]:
df = df.pivot(index=['School Name', 'Year', 'Month'],
         columns='Name',
         values='Value').reset_index()
df.head(10)

Name,School Name,Year,Month,Electricity Cost,Gas Cost,Maintenance Cost,Water Cost
0,School A,2021,April,5200,820,1550,1050
1,School A,2021,August,5200,820,1550,1000
2,School A,2021,December,5200,820,1550,1000
3,School A,2021,February,4800,750,1400,900
4,School A,2021,January,5000,800,1500,1000
5,School A,2021,July,4800,780,1400,950
6,School A,2021,June,5000,750,1500,900
7,School A,2021,March,5100,780,1450,950
8,School A,2021,May,5100,800,1520,1000
9,School A,2021,November,4800,780,1400,950


In [99]:
df.columns

Index(['School Name', 'Year', 'Month', 'Electricity Cost', 'Gas Cost',
       'Maintenance Cost', 'Water Cost'],
      dtype='object', name='Name')

### Calculate the total monthly cost for each school

In [100]:
df['Total Cost'] = df['Electricity Cost'] + df['Gas Cost'] + df['Maintenance Cost'] + df['Water Cost'] 

# make date and sort(month number)
df['Date'] = pd.to_datetime(df['Year'].astype(str) + '-' + df['Month'] + '-01')
df['Sort'] = pd.to_datetime(df['Month'], format='%B').dt.month

### Make it so that the dataset is ordered by month, for each school

In [103]:
# reorder column
df = df[['Sort','School Name','Date','Total Cost','Electricity Cost', 'Gas Cost','Maintenance Cost', 'Water Cost']]

# ordered by month, for each school
df = df.sort_values(by=['School Name','Sort'])
df = df.reset_index(level=0, drop=True)
df.head(15)

Name,Sort,School Name,Date,Total Cost,Electricity Cost,Gas Cost,Maintenance Cost,Water Cost
0,1,School A,2021-01-01,8300,5000,800,1500,1000
1,2,School A,2021-02-01,7850,4800,750,1400,900
2,3,School A,2021-03-01,8280,5100,780,1450,950
3,4,School A,2021-04-01,8620,5200,820,1550,1050
4,5,School A,2021-05-01,8420,5100,800,1520,1000
5,6,School A,2021-06-01,8150,5000,750,1500,900
6,7,School A,2021-07-01,7930,4800,780,1400,950
7,8,School A,2021-08-01,8570,5200,820,1550,1000
8,9,School A,2021-09-01,8470,5100,800,1520,1050
9,10,School A,2021-10-01,8150,5000,750,1500,900


### Output the data

In [104]:
df.to_csv(r'output/2021-week27-output.csv')