In [1]:
# Module Importations
import datetime
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
# Project Module Importations
from data_analytics.src.data import cosmos_client_manager
from data_analytics.src.data import load_data
from data_analytics.src.data import test_unit
from data_analytics.src.features import data_munging
from web_app.business_logic import data_insights

In [3]:
# Load data from local store
df_new = load_data.load_dash_dataframe()

Loaded Dash dataframe ...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97647 entries, 0 to 97646
Data columns (total 16 columns):
id             97647 non-null object
docType        97647 non-null object
busType        97647 non-null object
proType        97647 non-null object
timeId         97647 non-null object
quantity       97647 non-null float64
curveType      97647 non-null object
resolution     97647 non-null object
setDate        97647 non-null object
setPeriod      97647 non-null float64
powType        97647 non-null object
actFlag        97647 non-null object
docId          97647 non-null object
docRevNum      97647 non-null object
_ts            97647 non-null int64
setDatetime    97647 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(1), object(12)
memory usage: 11.9+ MB
None


In [5]:
# Copy dataframe
df_timeseries = df_new.copy()
df_timeseries.sort_values(by = ['setDatetime'], inplace = True)

# Mask dataframe between start and end dates
start_date = datetime.datetime(2020,3,1, 0, 0, 0)
end_date = datetime.datetime(2021,1,1, 0, 0, 0)

df_timeseries = df_timeseries[(df_timeseries['setDatetime'] > start_date)]
df_timeseries = df_timeseries[(df_timeseries['setDatetime'] < end_date)]

# Create dict for new dataframe, containing each parameter of interest
data_summary = {
        "Solar": [0, 0, 0],
        "Wind Offshore": [0, 0, 0],
        "Wind Onshore": [0, 0, 0]
        }

# Iterate over dict keys and populate stats
for key in data_summary:

    # Determine statistics for each generation type
    generation_min = data_insights.return_min(key, df_timeseries)
    generation_mean = data_insights.return_mean(key, df_timeseries)

    # Update dict with generation statistics
    data_summary[key][0] = generation_min
    data_summary[key][1] = generation_mean

# Create and return df from dict

# Tests
print(data_summary)

{'Solar': [0.0, 1904.8563542136098, 0], 'Wind Offshore': [0.0, 2461.8414811852185, 0], 'Wind Onshore': [0.0, 2451.949815457413, 0]}
