In [1]:
import pandas as pd
from datetime import datetime

# Read data from the Excel file
meter_list = pd.read_excel('gorilla_test_data.xlsx', sheet_name='meter_list')
forecast_data = pd.read_excel('gorilla_test_data.xlsx', sheet_name='forecast_table')
rate_data = pd.read_excel('gorilla_test_data.xlsx', sheet_name='rate_table')

# Convert dtype to datetime64 and coerce errors
forecast_data['date'] = pd.to_datetime(forecast_data['date'], errors='coerce', infer_datetime_format=True)
rate_data['date'] = pd.to_datetime(rate_data['date'], errors='coerce', infer_datetime_format=True)
# forecast_data.head()
# rate_data.head()

In [2]:
# Meter_list functions 

def return_aq_kwh(df_list, meter_id): 
    if meter_id not in df_list['meter_id'].values:
        raise ValueError(f"Meter ID {meter_id} not found in the meter_list.")
    
    aq_kwh = df_list.loc[df_list['meter_id'] == meter_id, 'aq_kwh'].values[0]
    # print(f"The meter with ID {meter_id} corresponds to this aq_kwh: {aq_kwh}")
    return aq_kwh

def return_exit_zone(df_list, meter_id): 
    if meter_id not in df_list['meter_id'].values:
        raise ValueError(f"Meter ID {meter_id} not found in the meter_list.")
    
    exit_zone = df_list.loc[df_list['meter_id'] == meter_id, 'exit_zone'].values[0]
    # print(f"The meter with ID {meter_id} corresponds to this exit zone: {exit_zone}")
    return exit_zone


# Forecast data functions 

# Convert input date in the correct datetime format
def convert_input_date(input_date):
    try:
        # Try to convert using the "DD/MM/YYYY" format
        converted_input_date = pd.to_datetime(input_date, format='%d/%m/%Y')
        return converted_input_date
    except ValueError:
        try:
            # If the first attempt fails, try the "YYYY-MM-DD" format
            converted_input_date = pd.to_datetime(input_date, format='%Y-%m-%d')
            return converted_input_date
        except ValueError:
            # If both attempts fail, raise an error
            raise ValueError("Invalid date format. Please provide a valid date.")

def return_kwh(df_forecast, meter_id, input_date): 
    converted_input_date = convert_input_date(input_date)
    mask = (df_forecast['meter_id'] == meter_id) & (df_forecast['date'] == converted_input_date)
    kwh = df_forecast.loc[mask, 'kwh'].values[0]
    # print(f'The kwh usage of {meter_id} on {converted_input_date.date()} is: {kwh}')
    return kwh


# Rate data functions 

# Function to find the closest past date
def find_closest_past_date(df_rate, input_date):
    input_date = convert_input_date(input_date)

    # Filter only past dates
    past_dates = df_rate[rate_data['date'] <= input_date]['date']

    # If there are past dates, find the closest one
    if not past_dates.empty:
        closest_past_date = past_dates.iloc[(past_dates - input_date).abs().idxmin()]
        # print(f"The date from which on the rates are applicable is: {closest_past_date.date()}")
        return closest_past_date
    else:
        return None
    


In [4]:
# Table filters

# Date range filter
def daterange_filter(df_forecast, df_rate, start_date="01/04/2020", end_date="2030-12-31"): 
    start_date_converted = convert_input_date(start_date)
    end_date_converted = convert_input_date(end_date)

        # Check if start_date is earlier than the available data
    if (start_date_converted < df_rate['date'].min()):
        raise ValueError("Start date is earlier than the available data.")

    # Check if start_date is later than end_date
    if start_date_converted > end_date_converted:
        raise ValueError("Start date is later than end date.")
    
    # Check if start_date is later than the latest date in the forecast table
    if start_date_converted > df_forecast['date'].max():
        raise ValueError("Start date is later than the latest date available in the forecast table.")

    # Filter df_forecast based on the date range
    df_forecast_filtered = df_forecast[(df_forecast['date'] >= start_date_converted) & (df_forecast['date'] <= end_date_converted)]

    # Filter df_rate based on the date range
    closest_past_date_rate = find_closest_past_date(df_rate, start_date_converted)
    df_rate_filtered = df_rate[(df_rate['date'] >= closest_past_date_rate) & (df_rate['date'] <= end_date_converted)]
    return df_forecast_filtered, df_rate_filtered

# meter_id filter
def meter_id_filter(df_list, meter_id, df_forecast, df_rate): 
    exit_zone = return_exit_zone(df_list, meter_id)
    df_rate_filtered = df_rate[(df_rate['exit_zone'] == exit_zone)]
    df_forecast_filtered = df_forecast[(df_forecast['meter_id'] == meter_id)]
    return df_forecast_filtered, df_rate_filtered

# aq_kwh filter
def aq_kwh_filter(df_rate, aq_kwh):
    if (aq_kwh < df_rate.iloc[0, 3]) and (aq_kwh >= 0):
        # Keep 1st row, delete 2 rows, repeat
        df_rate_filtered = df_rate.iloc[::3]
    elif aq_kwh < df_rate.iloc[1, 3]:
        # Delete 1st row, keep 2nd row, delete 2 rows, repeat
        df_rate_filtered = df_rate.iloc[1::3]
    elif aq_kwh > df_rate.iloc[2, 2]:
        # Delete 1st and 2nd row, keep 3rd row, delete 2 rows, repeat
        df_rate_filtered = df_rate.iloc[2::3]
    else:
        raise ValueError("Invalid aq_kwh value.")
    return df_rate_filtered

def total_filter(df_list, meter_id, df_forecast, df_rate, start_date="01/04/2020", end_date="2030-12-31"): 
    # print('\n--------------------------------------------\n')
    # print(f'Meter id {meter_id} with Daterange: {start_date} until {end_date}')
    df_forecast_filtered, df_rate_filtered = daterange_filter(df_forecast, df_rate, start_date, end_date)
    df_forecast_filtered, df_rate_filtered = meter_id_filter(df_list, meter_id, df_forecast_filtered, df_rate_filtered)
    aq_kwh = return_aq_kwh(df_list, meter_id)
    df_rate_filtered = aq_kwh_filter(df_rate_filtered, aq_kwh)
    return df_forecast_filtered, df_rate_filtered

In [5]:
# Calculation of totals 

# Create new dataframe based on fc_filtered and rate_filtered
def results_meter_id(fc_filtered, rate_filtered): 
    df_result = pd.merge(fc_filtered, rate_filtered, how='left', on='date')
    df_result.drop(['aq_min_kwh', 'aq_max_kwh'], axis=1, inplace=True)
    df_result[['exit_zone', 'rate_p_per_kwh']] = df_result[['exit_zone', 'rate_p_per_kwh']].fillna(rate_filtered.iloc[0][['exit_zone', 'rate_p_per_kwh']])
    # Forward fill the missing values in rate_filtered columns since empty columns due to left join
    df_result[['exit_zone', 'rate_p_per_kwh']] = df_result[['exit_zone', 'rate_p_per_kwh']].ffill()
    return df_result

# Calculate daily charges in pounds and round
def calculate_daily_charge(df_results): 
    df_results['daily_charge_pounds'] = df_results['kwh'] * df_results['rate_p_per_kwh'] * 0.01
    df_results['daily_charge_pounds'] = df_results['daily_charge_pounds'].round(2)
    return df_results

# Aggregated by meter_id
def results_aggregated(df_results): 
    sum_results = df_results.groupby('meter_id')[['kwh', 'daily_charge_pounds']].sum()
    sum_results = sum_results.rename(columns={'kwh': 'total_kwh', 'daily_charge_pounds': 'total_charge_pounds'})
    return sum_results

def get_results(df_list, meter_id, df_forecast, df_rate, start_date="01/04/2020", end_date="2030-12-31"): 
    df_forecast_filtered, df_rate_filtered = total_filter(df_list, meter_id, df_forecast, df_rate, start_date, end_date)
    df_results = results_meter_id(df_forecast_filtered, df_rate_filtered)
    df_results = calculate_daily_charge(df_results)
    sum_results = results_aggregated(df_results)
    return sum_results

In [7]:
# All meter_id results using a loop (not vectorized!)
def get_results_for_multiple_ids(df_list, meter_ids, df_forecast, df_rate, start_date="01/04/2020", end_date="2030-12-31"):
    results_list = []
    for meter_id in meter_ids:
        results = get_results(df_list, meter_id, df_forecast, df_rate, start_date, end_date)
        results_list.append(results)
    all_results = pd.concat(results_list)
    return all_results.reset_index()