In [1]:
# Module Importations
import datetime
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
# Project Module Importations
from data_analytics.src.data import cosmos_client_manager
from data_analytics.src.data import load_data
from data_analytics.src.data import test_unit
from data_analytics.src.features import data_munging
from web_app.business_logic import data_insights

In [3]:
# Load data from local store
df_new = load_data.load_dash_dataframe()

Loaded Dash dataframe ...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97647 entries, 0 to 97646
Data columns (total 16 columns):
id             97647 non-null object
docType        97647 non-null object
busType        97647 non-null object
proType        97647 non-null object
timeId         97647 non-null object
quantity       97647 non-null float64
curveType      97647 non-null object
resolution     97647 non-null object
setDate        97647 non-null object
setPeriod      97647 non-null float64
powType        97647 non-null object
actFlag        97647 non-null object
docId          97647 non-null object
docRevNum      97647 non-null object
_ts            97647 non-null int64
setDatetime    97647 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(1), object(12)
memory usage: 11.9+ MB
None


In [7]:
# Copy dataframe
df_timeseries = df_new.copy()
df_timeseries.sort_values(by = ['setDatetime'], inplace = True)

# Mask dataframe between start and end dates
start_date = datetime.datetime(2020,3,1, 0, 0, 0)
end_date = datetime.datetime(2021,1,1, 0, 0, 0)

df_timeseries = df_timeseries[(df_timeseries['setDatetime'] > start_date)]
df_timeseries = df_timeseries[(df_timeseries['setDatetime'] < end_date)]

# Calculate total generation across whole time series
total_generation = data_insights.return_total_sum(df_timeseries)

# Create dict for new dataframe, containing each parameter of interest
data_summary = {
        "Solar": [0, 0, 0, 0, 0],
        "Wind Offshore": [0, 0, 0, 0, 0],
        "Wind Onshore": [0, 0, 0, 0, 0],
        "Hydro Run-of-river and poundage": [0, 0, 0, 0, 0],
        "Hydro Pumped Storage": [0, 0, 0, 0, 0],
        "Other": [0, 0, 0, 0, 0], 
        "Nuclear": [0, 0, 0, 0, 0], 
        "Fossil Oil": [0, 0, 0, 0, 0], 
        "Fossil Gas": [0, 0, 0, 0, 0], 
        "Fossil Hard coal": [0, 0, 0, 0, 0], 
        "Biomass": [0, 0, 0, 0, 0]
        }

# Iterate over dict keys and populate stats
for key in data_summary:

    # Determine statistics for each generation type
    generation_min = data_insights.return_min(key, df_timeseries)
    generation_mean = data_insights.return_mean(key, df_timeseries)
    generation_max = data_insights.return_max(key, df_timeseries)
    generation_sum = data_insights.return_sum(key, df_timeseries)
    generation_percent = (generation_sum / total_generation) * 100

    # Update dict with generation statistics
    data_summary[key][0] = generation_min
    data_summary[key][1] = generation_mean
    data_summary[key][2] = generation_max
    data_summary[key][3] = generation_sum
    data_summary[key][4] = generation_percent

# Create and return df from dict

# Tests
print(data_summary)

{'Solar': [0.0, 1904.8563542136098, 9872.0, 16907505.0, 7.7050679748800395], 'Wind Offshore': [0.0, 2461.8414811852185, 6500.587, 21851304.987, 9.958050597333562], 'Wind Onshore': [0.0, 2451.949815457413, 7456.31, 21763506.562, 9.918039204007792], 'Hydro Run-of-river and poundage': [0.0, 378.06140153222174, 1050.0, 3355673.0, 1.5292432896793244], 'Hydro Pumped Storage': [0.0, 147.92012167643082, 2128.0, 1312939.0, 0.5983309921760203], 'Other': [0.0, 163.00608382154124, 424.0, 1446842.0, 0.6593531073278633], 'Nuclear': [0.0, 4886.8925191527715, 6265.0, 43376058.0, 19.767285319290995], 'Fossil Oil': [0.0, 0.0, 0.0, 0.0, 0.0], 'Fossil Gas': [0.0, 10113.23918431726, 25460.0, 89765111.0, 40.90764911958635], 'Fossil Hard coal': [0.0, 157.8262730959892, 4385.0, 1400866.0, 0.6384009795471479], 'Biomass': [0.0, 2056.5294051374494, 3161.0, 18253755.0, 8.31857941617089]}
