In [None]:
import numpy as np
import pandas as pd
import random
import datetime
import time

# execution with 1 meter & 1 day: 160 ms
# execution with 1000 meters & 1 day: 3349 ms
# execution with 1 meter & 1000 days: 170 ms
# execution with 1000 meters & 1000 days: 5885 ms
# execution with 10000 meters & 100 days: 40523 ms
# execution with 10000 meters & 1000 days: 809100ms

start_time = time.time()


def list_of_meters(number_of_meters):
    random_meters = np.random.randint(10000000, 99999999, number_of_meters).T
    exits_available = ['EA1', 'EA2', 'EA3', 'EA4', 'EM1', 'EM2', 'EM3', 'EM4', 'LC', 'LO', 'LS', 'LT', 'LW', 'NE1',
                       'NE2',
                       'NE3', 'NO1', 'NO2', 'NT1', 'NT2', 'NT3', 'NW1', 'NW2', 'SC1', 'SC2', 'SC4',
                       'SE1', 'SE2', 'SO1', 'SO2',
                       'SW1', 'SW2', 'SW3', 'WA1', 'WA2', 'WM1', 'WM2', 'WM3']
    exits_list = [exits_available[random.randrange(len(exits_available))] for i in range(number_of_meters)]
    aq_random = np.random.randint(0, 2120001, number_of_meters).T
    df = pd.DataFrame({'Meter ID': list(random_meters), 'Estimated Consumption (kWh)': aq_random,
                       'Exit': exits_list}, columns=['Meter ID', 'Estimated Consumption (kWh)', 'Exit'])
    return df


def random_forecast_table(listOfMeters, start_date, duration):
    date_list = [start_date + datetime.timedelta(days=x) for x in range(duration)]
    date_list = np.array(date_list)
    date_list = np.tile(date_list, len(listOfMeters)).T
    meter_list_for_dates = listOfMeters.repeat(duration)
    rnd_consumption = np.random.uniform(low=0.0, high=2000.0, size=(duration * len(listOfMeters),))
    rnd_forecast_table = pd.DataFrame({'Meter_ID': list(meter_list_for_dates), 'Date': list(date_list),
                                       'kwh': rnd_consumption},
                                      columns=['Meter_ID', 'Date', 'kwh'])
    return rnd_forecast_table


# Import data from excel to get the Rate Table
""""""""""""""""
CHANGE THE PATH TO WHERE YOUR EXCEL IS LOCATED
"""""""""""""""""
file = pd.ExcelFile(r'/Users/juliankautsch/PycharmProjects/GorillaAssesment/gorilla_test_data.xlsx')
RateTable = pd.read_excel(file, 'rate_table')
MetersList = []

# Create (X) number of meters with related exit zones and aq
MeterList = list_of_meters(1000)
RandomMeters = MeterList['Meter ID']

# Starting Date for consumption data
YYYY = 2022
MM = 12
DD = 19

# Duration (number of days) for consumption data
Duration = 100
ForecastTable = random_forecast_table(RandomMeters, pd.Timestamp(YYYY, MM, DD), Duration)
df_final = pd.DataFrame(columns=['Meter ID', 'Estimated Consumption (kWh)', 'Total Cost (£)'])
for i in range(len(MeterList)):
    meterid = MeterList['Meter ID'].iloc[i]
    aq = MeterList['Estimated Consumption (kWh)'].iloc[i]
    ExitZone = MeterList['Exit'].iloc[i]
    Rates1 = RateTable[(RateTable.exit_zone == ExitZone) & (RateTable.aq_min_kwh <= aq) &
                       ((RateTable.aq_max_kwh >= aq) | (RateTable.aq_max_kwh.isnull()))]
    RateDates = Rates1['date']
    Rate = pd.DataFrame(Rates1['rate_p_per_kwh'])
    Forecast1 = ForecastTable[ForecastTable.Meter_ID == meterid]
    EstimatedCost = Forecast1['kwh']
    ForecastDates = Forecast1['Date']
    consumption = 0
    totalCost = 0
    n = 0
    ConsumptionList = []
    # find starting rate - there aren't too many time periods, so quick solution
    while ForecastDates.iloc[0] > RateDates.iloc[n]:
        if ForecastDates.iloc[0] < RateDates.iloc[n + 1]: break
        n += 1
    starting_n = n

    # for starting conditions
    ConsumptionInPeriod = 0
    for i in RateDates:
        lastConsumption = ConsumptionInPeriod
        if n < len(RateDates) - 1:
            ConsumptionInPeriod = Forecast1.loc[(Forecast1['Date'] >= RateDates.iloc[n]) &
                                                (Forecast1['Date'] < RateDates.iloc[n + 1]), "kwh"].sum()
        else:
            ConsumptionInPeriod = Forecast1.loc[(Forecast1['Date'] >= RateDates.iloc[n]), "kwh"].sum()
        n += 1
        ConsumptionList.append(ConsumptionInPeriod)
        # stop if consumption becomes 0 and there was previous consumption
        # (for start dates not in first rates period)
        # OR when the last RateDate was checked
        if (ConsumptionInPeriod == 0) & (lastConsumption > 0) | n == len(RateDates): break
    ConsumptionDf = pd.DataFrame(ConsumptionList, columns=['kwh'])
    TotalCost = round((ConsumptionDf.values * Rate.iloc[starting_n:n].values / 100).sum(), 2)
    TotalConsumption = EstimatedCost.sum()
    MeterInfo = [meterid, TotalConsumption, TotalCost]
    MetersList.append(MeterInfo)
df = pd.DataFrame(MetersList, columns=['Meter ID', 'Total Estimated Consumption (kWh)', 'Total Cost (£)'])
print(df)

print("--- %s seconds ---" % (time.time() - start_time))
