In [1]:
# Module Importations
import datetime
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
# Project Module Importations
from data_analytics.src.data import cosmos_client_manager
from data_analytics.src.data import load_data
from data_analytics.src.data import test_unit
from data_analytics.src.features import data_munging
from web_app.business_logic import data_insights

In [3]:
# Load data from local store
df_new = load_data.load_dash_dataframe()

Loaded Dash dataframe ...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97647 entries, 0 to 97646
Data columns (total 16 columns):
id             97647 non-null object
docType        97647 non-null object
busType        97647 non-null object
proType        97647 non-null object
timeId         97647 non-null object
quantity       97647 non-null float64
curveType      97647 non-null object
resolution     97647 non-null object
setDate        97647 non-null object
setPeriod      97647 non-null float64
powType        97647 non-null object
actFlag        97647 non-null object
docId          97647 non-null object
docRevNum      97647 non-null object
_ts            97647 non-null int64
setDatetime    97647 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(1), object(12)
memory usage: 11.9+ MB
None


In [9]:
# Copy dataframe
df_timeseries = df_new.copy()
df_timeseries.sort_values(by = ['setDatetime'], inplace = True)

# Mask dataframe between start and end dates
start_date = datetime.datetime(2020,3,1, 0, 0, 0)
end_date = datetime.datetime(2021,1,1, 0, 0, 0)

df_timeseries = df_timeseries[(df_timeseries['setDatetime'] > start_date)]
df_timeseries = df_timeseries[(df_timeseries['setDatetime'] < end_date)]

# Calculate total generation across whole time series
total_generation = data_insights.return_total_sum(df_timeseries)

# Create dict for new dataframe, containing each parameter of interest
data_summary = {
        "Solar": [0, 0, 0, 0, 0],
        "Wind Offshore": [0, 0, 0, 0, 0],
        "Wind Onshore": [0, 0, 0, 0, 0],
        "Hydro Run-of-river and poundage": [0, 0, 0, 0, 0],
        "Hydro Pumped Storage": [0, 0, 0, 0, 0],
        "Other": [0, 0, 0, 0, 0], 
        "Nuclear": [0, 0, 0, 0, 0], 
        "Fossil Oil": [0, 0, 0, 0, 0], 
        "Fossil Gas": [0, 0, 0, 0, 0], 
        "Fossil Hard coal": [0, 0, 0, 0, 0], 
        "Biomass": [0, 0, 0, 0, 0]
        }

# Iterate over dict keys and populate stats
for key in data_summary:

    # Determine statistics for each generation type
    generation_min = data_insights.return_min(key, df_timeseries)
    generation_mean = data_insights.return_mean(key, df_timeseries)
    generation_max = data_insights.return_max(key, df_timeseries)
    generation_sum = data_insights.return_sum(key, df_timeseries)
    generation_percent = (generation_sum / total_generation) * 100

    # Update dict with generation statistics
    data_summary[key][0] = generation_min
    data_summary[key][1] = generation_mean
    data_summary[key][2] = generation_max
    data_summary[key][3] = generation_sum
    data_summary[key][4] = generation_percent

# Create and return df from dict
df_summary = pd.DataFrame.from_dict(data_summary, orient='index')

# Tests
print(df_summary)

                                   0             1          2             3  \
Solar                            0.0   1904.856354   9872.000  1.690750e+07   
Wind Offshore                    0.0   2461.841481   6500.587  2.185130e+07   
Wind Onshore                     0.0   2451.949815   7456.310  2.176351e+07   
Hydro Run-of-river and poundage  0.0    378.061402   1050.000  3.355673e+06   
Hydro Pumped Storage             0.0    147.920122   2128.000  1.312939e+06   
Other                            0.0    163.006084    424.000  1.446842e+06   
Nuclear                          0.0   4886.892519   6265.000  4.337606e+07   
Fossil Oil                       0.0      0.000000      0.000  0.000000e+00   
Fossil Gas                       0.0  10113.239184  25460.000  8.976511e+07   
Fossil Hard coal                 0.0    157.826273   4385.000  1.400866e+06   
Biomass                          0.0   2056.529405   3161.000  1.825376e+07   

                                         4  
Solar 