# GRAPHICAL ANALYSIS OF THE TeFE MODEL

## Prepping up

In [1]:
import pandas as pd
import numpy as np
import csv
import pickle
from pyspark.sql import SparkSession
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from operator import add
import kaleido
import plotly.io as pio

In [2]:
def load_into_df(_file):
    if 'pkl' in _file:
        with open(_file, 'rb') as f:
            df = pickle.load(f)
    elif 'csv' in _file:
        df = pd.read_csv (_file)
    elif 'feather' in _file:
        df = pd.read_feather(_file)
    elif 'parquet' in _file:
        df = pd.read_parquet(_file)
    else:
        print('not loaded, format unsupported')
        df = pd.DataFrame(['Not loaded', 'format unsupported'])

    return df

In [3]:
mix_df = load_into_df('normal_mix____ALL_NO_NO.csv')
contracts_df = load_into_df('normal_contracts____ALL_NO_NO.csv')
technologic_df = load_into_df('normal_technologic____ALL_NO_NO.csv')
agents_df = load_into_df('normal_agents____ALL_NO_NO.csv')

MemoryError: Unable to allocate 128. KiB for an array with shape (16384,) and data type int64

In [None]:
print('lines, ', mix_df.shape[0])
print('columns, ', mix_df.shape[1])
columns_mix_df = list(mix_df[:])
print('columns are:', columns_mix_df)
print('Number of unique rows:', len(mix_df.entry.unique()))
mix_df.head()

In [None]:
mix_df.groupby(['period', 'seed'],as_index=False)['Lumps'].sum().groupby(['period'], as_index=False).quantile(0.5)

In [None]:
print('lines, ', agents_df.shape[0])
print('columns, ', agents_df.shape[1])
columns_agents_df = list(agents_df[:])
print('columns are:', columns_agents_df)
print('Number of unique rows:', len(agents_df.entry.unique()))
agents_df.head()

In [None]:
print('lines, ', contracts_df.shape[0])
print('columns, ', contracts_df.shape[1])
columns_contracts_df = list(contracts_df[:])
print('columns are:', columns_contracts_df)
print('Number of unique rows:', len(contracts_df.entry.unique()))
contracts_df.head()

In [None]:
print('lines, ', technologic_df.shape[0])
print('columns, ', technologic_df.shape[1])
columns_technologic_df = list(technologic_df[:])
print('columns are:', columns_technologic_df)
print('Number of unique rows:', len(technologic_df.entry.unique()))
technologic_df.head()

In [None]:
max_period = mix_df.period.max()
min_period = mix_df.period.min()

In [None]:
main_agents_df_median = agents_df.groupby(["period", 'genre'], as_index=False)[columns_agents_df].median()
print(main_agents_df_median.head())

main_agents_df_mean = agents_df.groupby(["period", 'genre'], as_index=False)[columns_agents_df].mean()
print(main_agents_df_mean.head())

main_agents_df_25quart = agents_df.groupby(["period", 'genre'], as_index=False)[columns_agents_df].quantile(.25)
print(main_agents_df_25quart.head())

main_agents_df_75quart = agents_df.groupby(["period", 'genre'], as_index=False)[columns_agents_df].quantile(.75)
main_agents_df_75quart.head()

In [None]:
agents_df.groupby(['period'], as_index=False).max()['period'][ : : -1]

In [None]:
def iqr_plotly(var, dataframe, x_axis, groupby=None, remove_outliers=True, _sum=False, _old=False):

    if groupby is None:
        groupby = [x_axis]

    # var = 'capacity'
    # name_o_var = 'Number of adaptations'

    # .groupby(['period', 'seed'],as_index=False)['Lumps'].sum().groupby(['period'], as_index=False).quantile(0.5)

    x =     list(dataframe.groupby([x_axis], as_index=False).max()[x_axis])
    x_rev = list(dataframe.groupby([x_axis], as_index=False).max()[x_axis][ : : -1])

    if _sum is True or _old is True:
        DF = dataframe.groupby(['period', 'seed'],as_index=False)[var].sum() if _sum is True else dataframe.groupby(['period', 'seed'],as_index=False)[var].mean()

        _y_max=  list(DF.groupby(groupby, as_index=False).quantile(1)[var])
        y_upper= list(DF.groupby(groupby, as_index=False).quantile(.75)[var])
        y_median=list(DF.groupby(groupby, as_index=False).median()     [var])
        y_mean=  list(DF.groupby(groupby, as_index=False).mean()       [var])
        y_bottom=list(DF.groupby(groupby, as_index=False).quantile(.25)[var])
        _y_min=  list(DF.groupby(groupby, as_index=False).quantile(0)[var])

    else:

        _y_max=  list(dataframe.groupby(['period', 'seed'],as_index=False)[var].quantile(1).groupby(['period'], as_index=False)  .mean()[var])
        y_upper= list(dataframe.groupby(['period', 'seed'],as_index=False)[var].quantile(.75).groupby(['period'], as_index=False) .mean()[var])
        y_median=list(dataframe.groupby(['period', 'seed'],as_index=False)[var].median().groupby(['period'], as_index=False).mean()[var])
        y_mean=  list(dataframe.groupby(['period', 'seed'],as_index=False)[var].mean().groupby(['period'], as_index=False)  .mean()[var])
        y_bottom=list(dataframe.groupby(['period', 'seed'],as_index=False)[var].quantile(.25).groupby(['period'], as_index=False).mean()[var])
        _y_min=  list(dataframe.groupby(['period', 'seed'],as_index=False)[var].quantile(0).groupby(['period'], as_index=False)  .mean()[var])

    if remove_outliers is True:
        y_max = []
        y_min = []
        for i in range(0,len(_y_max)):
            y_max.append(min(max(_y_max[i], y_upper[i], y_median[i], y_bottom[i], _y_min[i]), y_upper[i] + 1.5*(y_upper[i]-y_bottom[i])))
            y_min.append(max(min(_y_max[i], y_upper[i], y_median[i], y_bottom[i], _y_min[i]), y_bottom[i] - 1.5*(y_upper[i]-y_bottom[i])))
    else:
        y_max = _y_max
        y_min = _y_min

    y_min = y_min[: : -1]
    y_bottom=y_bottom[: : -1]

    return x, x_rev, y_upper, y_median, y_mean, y_bottom, y_max, y_min

In [None]:
def simple_graph(name_o_var, var, dataframe, x_axis, groupby=None, remove_outliers=True, show=True, log_y=False, log_x=False, color='232,126,4', _sum=False):

    x, x_rev, y_upper, y_median, y_mean, y_bottom, y_max, y_min = iqr_plotly(var, dataframe, x_axis, groupby, remove_outliers, _sum=_sum)

    fig = go.Figure(go.Scatter(name="IQR (%s)" % name_o_var,
                             x=x + x_rev,
                             y=y_upper+y_bottom,
                             fill='toself',
                             fillcolor='rgba(%s,0.4)' % color,
                             line=dict(color='rgba(255,255,255,0)')))

    fig.add_trace(go.Scatter(name="Max and min (%s)" % name_o_var,
                             x=x + x_rev,
                             y=y_max+y_min,
                             fill='toself',
                             fillcolor='rgba(%s,0.2)' % color,
                             line=dict(color='rgba(255,255,255,0)')))

    fig.add_trace(go.Scatter(name="Median (%s)" % name_o_var,
                             x=x + x_rev,
                             y=y_median,
                            mode='lines',
                             line=dict(color='rgba(%s,1)' % color, dash='dot')))

    fig.update_yaxes(type="log") if log_y is True else None
    fig.update_xaxes(type="log") if log_x is True else None
    fig.update_layout(barmode='overlay', template="simple_white")

    title=str(name_o_var) + ' over ' + str(x_axis)

    fig.update_layout(
        title=title,
        xaxis_title=str(x_axis),
        yaxis_title=str(name_o_var),
        barmode='overlay',
        template="simple_white")

    # fig.show()

    file_name = title + ".html"
    pathfile='Figures/'

    fig.write_html(pathfile + file_name)

    return fig.show() if show is True else None

In [None]:
color=('232,126,4')
print('%s' % color)

In [None]:
name_o_var = 'LSS_tot'

dataframe = agents_df  # .loc[agents_df['genre'] == 'TP']

x, x_rev, y_upper, y_median, y_mean, y_bottom, y_max, y_min = iqr_plotly('LSS_tot', dataframe, 'period', remove_outliers=True, _sum=True)

fig = go.Figure(go.Scatter(name="IQR (%s)" % name_o_var,
                             x=x + x_rev,
                             y=list(y_upper) + list(y_bottom),
                             fill='toself',
                             fillcolor='rgba(232,126,4,0.2)',
                             line=dict(color='rgba(255,255,255,0)')))

fig.add_trace(go.Scatter(name="Max and min (%s)" % name_o_var, x=x + x_rev,
                         y=list(y_max) + list(y_min), fill='toself', fillcolor='rgba(232,126,4,0.4)', line=dict(color='rgba(255,255,255,0)')))

fig.add_trace(go.Scatter(name="Median (%s)" % name_o_var,
                             x= x,
                             y=y_median,
                         mode='lines',
                             line=dict(color='rgba(232,126,4,1)', dash='dot')))


fig.update_layout(barmode='overlay', template="simple_white")
fig.show()

file_name = str(name_o_var) + ".html"
pathfile='Figures/'

# fig.write_html(pathfile + file_name)

In [None]:
graphs_list = [
    {
    "name_o_var": 'Number of adaptations',
    "var": 'LSS_tot',
    "dataframe": agents_df,
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Number of adaptations of Technology producers',
    "var": 'LSS_tot',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Number of adaptations of Energy providers',
    "var": 'LSS_tot',
    "dataframe": agents_df.loc[agents_df['genre'] == 'EP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Full number of adaptations',
    "var": 'LSS_weak',
    "dataframe": agents_df,
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Full number of adaptations of Technology producers',
    "var": 'LSS_weak',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Full number of adaptations of Energy providers',
    "var": 'LSS_weak',
    "dataframe": agents_df.loc[agents_df['genre'] == 'EP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Money',
    "var": 'wallet',
    "dataframe": agents_df,
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Money of Technology producers',
    "var": 'wallet',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Money of Energy providers',
    "var": 'wallet',
    "dataframe": agents_df.loc[agents_df['genre'] == 'EP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Ammount to shareholders',
    "var": 'shareholder_money',
    "dataframe": agents_df,
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Ammount to shareholders of Technology producers',
    "var": 'shareholder_money',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Ammount to shareholders of Energy providers',
    "var": 'shareholder_money',
    "dataframe": agents_df.loc[agents_df['genre'] == 'EP'],
    "x_axis": 'period'
    },
    {
    "name_o_var": 'Investment in capacity of Technology producers',
    "var": 'capacity',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period',
    },
    {
    "name_o_var": 'Investment in R&D to shareholders of Technology producers',
    "var": 'RandD',
    "dataframe": agents_df.loc[agents_df['genre'] == 'TP'],
    "x_axis": 'period',
    },
    {
    "name_o_var": 'Remaining demand',
    "var": 'Remaining_demand',
    "dataframe": agents_df.loc[agents_df['genre'] == 'DD'],
    "x_axis": 'period'
    },
]

for graph in graphs_list:
    if 'log_y' in graph:
        log_y = graph['log_y']
    else:
        log_y= False
    if 'log_x' in graph:
        log_x = graph['log_x']
    else:
        log_x= False
    simple_graph(graph['name_o_var'], graph['var'], graph['dataframe'], graph['x_axis'], groupby=None, remove_outliers=True, show=True, log_y=log_y, log_x=log_x)
simple_graph('Remaining demand (sum)', 'Remaining_demand', agents_df.loc[agents_df['genre'] == 'DD'], 'period', groupby=None, remove_outliers=True, show=True, _sum=True)

In [None]:
graphs_list = [
    {
    "name_o_var": 'Avoided emissions',
    "var": 'avoided_emissions',
    "dataframe": mix_df,
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Number of lumps',
    "var": 'Lumps',
    "dataframe": mix_df,
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Number of lumps of wind',
    "var": 'Lumps',
    "dataframe": mix_df.loc[mix_df['source'] == 1],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Number of lumps of solar',
    "var": 'Lumps',
    "dataframe": mix_df.loc[mix_df['source'] == 2],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Number of lumps of thermal',
    "var": 'Lumps',
    "dataframe": mix_df.loc[mix_df['source'] == 0],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Greeness of the system',
    "var": 'green',
    "dataframe": mix_df,
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Price',
    "var": 'price',
    "dataframe": mix_df,
    "x_axis": 'period',
    },
    {
    "name_o_var": 'Electricity produced',
    "var": 'MWh',
    "dataframe": mix_df.loc[mix_df['status'] == 'contracted'],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Electricity produced by solar',
    "var": 'MWh',
    "dataframe": mix_df.loc[(mix_df['status'] == 'contracted') & (mix_df['source'] == 1)],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Electricity produced by wind',
    "var": 'MWh',
    "dataframe": mix_df.loc[(mix_df['status'] == 'contracted') & (mix_df['source'] == 2)],
    "x_axis": 'period',
    "sum": True
    },
    {
    "name_o_var": 'Electricity produced by thermal',
    "var": 'MWh',
    "dataframe": mix_df.loc[(mix_df['status'] == 'contracted') & (mix_df['source'] == 0)],
    "x_axis": 'period',
    "sum": True
    },
]


for graph in graphs_list:
    if 'log_y' in graph:
        log_y = graph['log_y']
    else:
        log_y= False
    if 'log_x' in graph:
        log_x = graph['log_x']
    else:
        log_x= False

    if 'sum' in graph:
        _sum = True
    else:
        _sum = False
    simple_graph(graph['name_o_var'], graph['var'], graph['dataframe'], graph['x_axis'], groupby=None, remove_outliers=True, show=True, log_y=log_y, log_x=log_x, _sum=_sum)

    print('DONE')
print('All DONE')

In [None]:
#simple_graph('Electricity produced by thermal', 'MWh', mix_df.loc[(mix_df['status'] == 'contracted') & (mix_df['source'] == 0)], 'period', groupby=None, remove_outliers=True, show=True, _sum=True)

In [None]:
def scatter_graph(full_x, full_y, speed=False, groupby=None, show=True):

    if groupby is None:
        groupby = 'period'

    # dataframe = agents_df  # .loc[agents_df['genre'] == 'TP']

    x_var, DF_x, _sum_x = full_x[0], full_x[1], full_x[2]
    y_var, DF_y, _sum_y = full_y[0], full_y[1], full_y[2]


    DF_x = DF_x.groupby(['period', 'seed'],as_index=False)[x_var].sum() if _sum_x is True else DF_x.groupby(['period', 'seed'],as_index=False)[x_var].mean()
    DF_y = DF_y.groupby(['period', 'seed'],as_index=False)[y_var].sum() if _sum_y is True else DF_y.groupby(['period', 'seed'],as_index=False)[y_var].mean()

    x =list(DF_x.groupby(groupby, as_index=False).median()[x_var])
    y =list(DF_y.groupby(groupby, as_index=False).median()[y_var])

    if speed is True:
        """priv_goal[period] - priv_goal[period - 1]
                                 ) / priv_goal[period - 1] if priv_goal[period - 1] > 0 else 1"""
        x= [(x[i] - x[i-1])/x[i-1] if x[i-1] != 0 else 0 for i in range(1,len(x))]
        x[0] = 0
        y= [(y[i] - y[i-1])/y[i-1] if y[i-1] != 0 else 0 for i in range(1,len(y))]
        y[0] = 0

    fig = go.Figure(go.Scatter(x = x,
                               y = y,
                               mode='markers',
                               marker=dict(
                                   color=list(DF_x.groupby(['period'], as_index=False).max()['period']),
                                   colorscale='Viridis',
                                   line_width=1,
                                   showscale=True)))

    fig.update_yaxes(type="log")
    fig.update_xaxes(type="log")

    title=str(x_var) + " in relation to " + str(y_var)

    fig.update_layout(
        title=title,
        xaxis_title=str(x_var),
        yaxis_title=str(y_var),
        barmode='overlay',
        template="simple_white")

    # fig.show()

    file_name = title + ".html"
    pathfile='Figures/'

    fig.write_html(pathfile + file_name)

    return fig.show() if show is True else print(str(file_name) + ' is done')

In [None]:
scatter_graph(['LSS_tot',
               agents_df.loc[agents_df['genre'] == 'EP'], False],
              ['wallet', agents_df.loc[agents_df['genre'] == 'EP'], False],
              speed=True,
              show=False)

scatter_graph(['LSS_tot',
               agents_df.loc[agents_df['genre'] == 'EP'], False],
              ['wallet', agents_df.loc[agents_df['genre'] == 'EP'], False],
              speed=False,
              show=False)


