# Gorilla assessment

In [1]:
import pandas as pd

### Import xlsx from current directory as df

For some reason, the read_excel function fails when using provided xlsx file. When opening file and saving as Office 2007-2013 XML in LibreOffice, read_excel function works flawlessly. 

In [2]:
#date as index enables timeseries operations later on
forecast = pd.read_excel('data.xlsx', sheet_name='Forecasted Consumption', index_col=0)
meters = pd.read_excel('data.xlsx', sheet_name='Meter List', index_col=0)
rates = pd.read_excel('data.xlsx', sheet_name='Rates', index_col=0)

forecast.dtypes

Meter ID      int64
kWh         float64
dtype: object

In [3]:
forecast.head()

Unnamed: 0_level_0,Meter ID,kWh
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-01,1000000603,28.782474
2020-10-01,10588707,126.367711
2020-10-01,10626610,326.240595
2020-10-02,1000000603,26.690797
2020-10-02,10588707,118.322449


In [4]:
meters.head()

Unnamed: 0_level_0,Exit Zone,Annual Quantity (kWh)
Meter ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1000000603,SC1,12337
10588707,EM2,75123
10626610,NW1,151779


In [5]:
rates.head()

Unnamed: 0_level_0,Exit Zone,Annual Quantity (Min),Annual Quantity (Max),Rate (p/kWh)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-01,EA1,0,73200.0,0.0287
2020-04-01,EA2,0,73200.0,0.0287
2020-04-01,EA3,0,73200.0,0.0287
2020-04-01,EA4,0,73200.0,0.0287
2020-04-01,EM1,0,73200.0,0.0287


### Helper functions for getting annual qty and exit zone

In [6]:
def get_qty(meter_ID):
    """
    extract annual forecasted quantity
    in: meter_ID (int)
    returns: qty (int)
    """
    #filter for meter
    meter = meters.loc[meter_ID]
    
    #sum full time period
    qty = meter['Annual Quantity (kWh)']
    
    return qty

def get_exit(meter_ID):
    """
    extract exit zone
    in: meter_ID (int)
    returns: exit (str)
    """
    #filter for meter
    meter = meters.loc[meter_ID]
    
    #sum full time period
    exit = meter['Exit Zone']
    
    return exit

In [7]:
#test
# meter_ID = 1000000603
# print(get_exit(meter_ID))
# print(get_qty(meter_ID))

### Function to calculate day rate

In [8]:
def get_rate_date(date): 
    """finds the latest applicable date for rate"""
    pre = rates.loc[rates.index < date]
    date = pre.index.max()
    return pd.to_datetime(date, format='%Y-%m-%d')

#test
#print(get_rate_date('20210202'))

def calc_rate(meter_ID, date): 
    """get rates for meter_ID on given date
    meter_ID (int)
    date (pd.dt object)
    return: rate (float)
    """
    exit = get_exit(meter_ID)
    qty = get_qty(meter_ID)
    rate_date = get_rate_date(date)
    
    #get rates: filter for exit zone, dates and forecasts
    rate = rates.get(rates['Exit Zone'] == exit)
    rate = rate.get(rate.index == rate_date)
    rate = rate.get(rate['Annual Quantity (Min)'] < qty)
    rate = rate.get(rate['Annual Quantity (Min)'] == rate['Annual Quantity (Min)'].max())
        
    return rate['Rate (p/kWh)'].iloc[0]

#test
#print(calc_rate(10626610,'20210202'))
    

### Add day rate to forecast

In [9]:
#duplicate forecast
forecast_rate = forecast

#add col with day rate using list comprehension
forecast_rate['Day rate'] = [calc_rate(meter_ID,  date) for meter_ID, date in zip(forecast['Meter ID'], forecast.index)]

forecast_rate.head()


Unnamed: 0_level_0,Meter ID,kWh,Day rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-10-01,1000000603,28.782474,0.0333
2020-10-01,10588707,126.367711,0.0228
2020-10-01,10626610,326.240595,0.0285
2020-10-02,1000000603,26.690797,0.0333
2020-10-02,10588707,118.322449,0.0228


### Calculate daily cost

In [10]:
forecast_rate['Day cost'] = forecast_rate['kWh'] * forecast_rate['Day rate']

forecast_rate.head()

Unnamed: 0_level_0,Meter ID,kWh,Day rate,Day cost
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-10-01,1000000603,28.782474,0.0333,0.958456
2020-10-01,10588707,126.367711,0.0228,2.881184
2020-10-01,10626610,326.240595,0.0285,9.297857
2020-10-02,1000000603,26.690797,0.0333,0.888804
2020-10-02,10588707,118.322449,0.0228,2.697752


### Calculate total cost

In [11]:
#copy index from meters dataframe
total_consumption = pd.DataFrame(index=meters.index)

def sum_meter(meter_ID, col):
    """
    function intended for summing forecasted consumption or cost of passed meter
    meter_ID = (int)
    col = name of column to be summed (string)
    return: sum of filtered column (float)
    """
    filtered = forecast_rate.get(forecast_rate['Meter ID'] == meter_ID)
    
    return filtered[col].sum()
    
    
#calculate estimated total consumption by summing dayly forecast
total_consumption['Total Estimated Consumption (kWh)'] = [sum_meter(meter_ID, 'kWh') for meter_ID in total_consumption.index.values]

#calculate estimated total consumption by summing dayly forecast
total_consumption['Total Cost (£)'] = [sum_meter(meter_ID, 'Day cost') for meter_ID in total_consumption.index.values]
#convert pence to £
total_consumption['Total Cost (£)'] = total_consumption['Total Cost (£)']/100

total_consumption = total_consumption.round(2)
total_consumption.head()

Unnamed: 0_level_0,Total Estimated Consumption (kWh),Total Cost (£)
Meter ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1000000603,24674.0,8.27
10588707,122450.0,28.49
10626610,303558.0,88.23
