In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import numpy as np
pio.renderers.default = "notebook"

In [None]:
data_pcc_country = pd.read_csv("../../data/processed/pcc_energy_extrapolated_5_country.csv",index_col=0)
data_socio_country = pd.read_csv("../../data/processed/socio_extrapolated_5_country.csv",index_col=0)

data_pcc_country = data_pcc_country.set_index(['Entity','Continent','Year'])
data_socio_country = data_socio_country.set_index(['Entity','Continent','Year'])

In [None]:
data_pcc_socio_country = data_pcc_country.join(data_socio_country,how='outer').reset_index()
data_pcc_socio_country = data_pcc_socio_country.sort_values(['Year','Entity']).reset_index().drop(columns='index')
data_pcc_socio_country['Fraction of Low-carbon energy per capita'] = data_pcc_socio_country['Low-carbon energy per capita (kWh)']/data_pcc_socio_country['Energy per capita (kWh)']
data_pcc_socio_country.head(5)

In [None]:
x = "GDP_Per_Capita ($)"
size = "Population"

fig = px.scatter(data_pcc_socio_country.iloc[np.sum(np.array(data_pcc_socio_country[[x,"Fraction of Low-carbon energy per capita"]].isnull())*1.0,axis=1) == 0], 
                 x=x, size=size,
	             y="Fraction of Low-carbon energy per capita",# color="Continent",
                 animation_frame="Year", animation_group="Entity",
                 hover_name="Entity", log_x=False, size_max=60,
                 range_x=[np.min(data_pcc_socio_country[x]),np.max(data_pcc_socio_country[x])*1.1], 
                 range_y=[-0.2,1.2], trendline_scope='trace',
                 trendline="lowess", 
                 trendline_options=dict(frac=0.33))#,trendline_color_override='black')
fig.show()

# New stuff

In [None]:
df = data_pcc_socio_country
col_int = ['GDP per capita ($)','Child mortality rate (%)','HDI','Life expectancy (years)',
           'Tertiary education (%)','Internet users (%)','Tax revenue of total GDP (%)']

In [None]:
x = 'Tertiary education (%)'
y = "Fraction of Low-carbon energy per capita"
df_int = (df_social_energy.iloc[np.sum(np.array(df_social_energy[[x,y]].isnull())*1.0,axis=1) == 0]
            .reset_index()
            .drop(columns='index'))

for i in np.sort(df_int['Year'].unique()):
    if len(df_int['Continent'][df_int['Year']==i].unique()) != 6:
        df_int = df_int[df_int['Year'] != i].reset_index().drop(columns='index')
    else:
        break;

In [None]:
df_int

In [None]:
import pandas as pd
import dash
from dash import dcc
from dash import html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.express as px

In [None]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

In [None]:
def dropdown():
    return html.Div([dcc.Dropdown(id='dropdown',
                                  options=[{'label': i, 'value': i} for i in col_int],
                                  value="GDP per capita ($)")]
                    ,className="dropdown")

def graph_scatter():
    return 

def graph_scatter_overall_trend():
    return dcc.Graph(id="graph_scatter_overall_trend")

def graph_continent_trend():
    return dcc.Graph(id="graph_continent_trend")

In [None]:
app.layout = dbc.Container([
                            dbc.Row([dbc.Col([html.Div("Relationship between energy types and social data", className="heading")])]),
                            dbc.Row([dbc.Col(dropdown())]),
                            dcc.Tabs(id="tabs-selector",
                                     value="tab-1",
                                     className="custom-tabs-container",
                                     children=[
                                                dcc.Tab(label="ScatterPlot",
                                                        value="tab-1",
                                                        className="custom-tab",
                                                        children=[html.Div([graph_scatter(),html.Br()]),
                                                                  ]),
                                                dcc.Tab(label="ScatterPlot_OverallTrend",
                                                        value="tab-2",
                                                        className="custom-tab",
                                                        children=[html.Div([graph_scatter_overall_trend(),html.Br()]),
                                                                  ]),
                                                dcc.Tab(label="Continent_trend",
                                                        value="tab-3",
                                                        className="custom-tab",
                                                        children=[html.Div([graph_continent_trend(),html.Br()]),
                                                                  ]),
                                     ]),
])

In [None]:
@app.callback([Output('graph_scatter', 'figure'),
               Output('graph_scatter_overall_trend', 'figure'),
               Output("graph_continent_trend", "figure")],
              [Input('dropdown', 'value'),
               Input('tab', 'value')])
def update_graph(dropdown, tab):
    fig1 = px.plot()
    fig2 = px.plot()
    fig3 = px.plot()

    x = dropdown
    y = "Fraction of Low-carbon energy per capita"
    df_int = (df.iloc[np.sum(np.array(df[[x,y]].isnull())*1.0,axis=1) == 0]
                .reset_index()
                .drop(columns='index'))

    if tab == 'tab-1':
        

        fig1 = px.scatter(df_int, 
                          x=x, y=y,
                          size="Energy per capita (kWh)",
                          color="Continent",
                          animation_frame="Year", animation_group="Entity",
                          hover_name="Entity", log_x=False, size_max=60,
                          range_x=[np.min(data_pcc_socio_country[x]),np.max(data_pcc_socio_country[x])*1.1], 
                          range_y=[-0.2,1.2], trendline_scope='trace',
                          trendline="lowess", 
                          trendline_options=dict(frac=0.33))#,trendline_color_override='black')
    elif tab == 'tab-2':
        fig2 = px.scatter(df_int, 
                          x=x, y=y,
                          size="Energy per capita (kWh)",
                          color="Continent",
                          animation_frame="Year", animation_group="Entity",
                          hover_name="Entity", log_x=False, size_max=60,
                          range_x=[np.min(data_pcc_socio_country[x]),np.max(data_pcc_socio_country[x])*1.1], 
                          range_y=[-0.2,1.2], trendline_scope='trace',
                          trendline="lowess", 
                          trendline_options=dict(frac=0.33))#,trendline_color_override='black')
    elif tab == 'tab-3':
        fig3 = px.scatter(df_int, 
                          x=x, y=y,
                          size="Energy per capita (kWh)",
                          color="Continent",
                          animation_frame="Year", animation_group="Entity",
                          hover_name="Entity", log_x=False, size_max=60,
                          range_x=[np.min(data_pcc_socio_country[x]),np.max(data_pcc_socio_country[x])*1.1], 
                          range_y=[-0.2,1.2], trendline_scope='trace',
                          trendline="lowess", 
                          trendline_options=dict(frac=0.33))#,trendline_color_override='black')

        
    return fig1,fig2,fig3


In [None]:
app.run_server(debug=False, use_reloader=False)

# OLD ANALYSIS

In [None]:
col_int = ['GDP_Per_Capita','HDI','Life_Expectancy','Child_Mortality_Rate']

In [None]:
df_time_anal = data_pcc_socio_country[['Entity','Continent','Year','Energy per capita (kWh)','Fraction of Low-carbon energy per capita'] + col_int]

In [None]:
df_time_anal

In [None]:
df_time_anal = df_time_anal.sort_values(['Entity','Year']).reset_index().drop(columns='index')

In [None]:
df_time_anal

In [None]:
df_time_anal[['Entity','Year','Continent']]

In [None]:
qt_dfs = []
n = df_time_anal.shape[0]
ents = df_time_anal['Entity'].unique()

for qt in np.arange(0,1.0,0.1):
    print(qt)
    qt_df = df_time_anal[['Entity','Year','Continent','Energy per capita (kWh)','Fraction of Low-carbon energy per capita']]
    for i in range(len(col_int)-1):
        qt_df = pd.concat([qt_df,df_time_anal[['Entity','Year','Continent','Energy per capita (kWh)','Fraction of Low-carbon energy per capita']]],axis=0)
    qt_df = qt_df.reset_index().drop(columns='index')

    qt_df['Q_Value'] = np.nan
    qt_df['Years'] = np.nan
    qt_df['Column'] = np.nan
    for i_col, col in enumerate(col_int):
        vals = df_time_anal[col] 
        qt_val = np.round(np.quantile(vals[vals.isnull()==False],qt),2)
        
        count_years = 0
        for i_val, val in enumerate(vals):
            if i_val > 0:
                if (df_time_anal.loc[i_val,'Entity'] != df_time_anal.loc[i_val-1,'Entity']):
                    count_years = 0
            if (val >= qt_val):
                if i_val > 0:
                    if ((vals[i_val-1] >= qt_val) & (df_time_anal.loc[i_val,'Entity'] == df_time_anal.loc[i_val-1,'Entity'])):
                        count_years += 1
                        
                qt_df.loc[i_col*n+i_val,'Q_Value'] = qt_val
                qt_df.loc[i_col*n+i_val,'Column'] = col
                qt_df.loc[i_col*n+i_val,'Years'] = count_years

        # Add extra data for removing viz when data no longer exists
        qt_extra = qt_df[0:len(ents)*2].copy()
        
        for indx, ent in enumerate(ents):
            for indy,years in enumerate([-2000,-1000]):
                qt_extra.loc[indx*2+indy,'Q_Value'] = qt_val
                qt_extra.loc[indx*2+indy,'Column'] = col
                qt_extra.loc[indx*2+indy,'Entity'] = ent
                qt_extra.loc[indx*2+indy,'Year'] = 0
                qt_extra.loc[indx*2+indy,'Years'] = years
                qt_extra.loc[indx*2+indy,'Continent'] = 'NULL'
                qt_extra.loc[indx*2+indy,'Energy per capita (kWh)'] = 0
                qt_extra.loc[indx*2+indy,'Fraction of Low-carbon energy per capita'] = 0

        qt_df = pd.concat([qt_df,qt_extra],axis=0).reset_index().drop(columns='index') 

    qt_df = qt_df[qt_df['Years'].isnull()==False].reset_index().drop(columns='index')       
    qt_dfs.append(qt_df)

qt_dfs = pd.concat(qt_dfs,axis=0).reset_index().drop(columns='index')
#qt_dfs['Q_Value'] = qt_dfs['Q_Value'].astype(int)
qt_dfs = qt_dfs.sort_values(['Year','Entity']).reset_index().drop(columns='index')


In [None]:
x = "GDP_Per_Capita"
dat = qt_dfs[qt_dfs['Column']==x]
#dat = dat[dat['Entity']=='Sweden']
fig = px.line(dat.iloc[np.sum(np.array(dat.isnull())*1.0,axis=1) == 0].reset_index().drop(columns='index'), 
                 x='Years',# size="Energy per capita (kWh)",
	             y="Fraction of Low-carbon energy per capita", color="Entity",
                 animation_frame="Q_Value", animation_group="Entity",
                 hover_name="Entity", log_x=False,
                 range_x=[-1,np.max(dat['Years'])+1], 
                 range_y=[-0.1,1.1])#,
                 #trendline='lowess',trendline_scope='overall',trendline_color_override='black')
fig.show()

In [None]:
test = qt_dfs[qt_dfs['Column']=='GDP_Per_Capita'].reset_index().drop(columns='index')

In [None]:
test

In [None]:
data_pcc_country = pd.read_csv("../../data/processed/pcc_energy_extrapolated_5_country.csv",index_col=0)
data_socio_country = pd.read_csv("../../data/processed/socio_extrapolated_5_country.csv",index_col=0)

data_pcc_country = data_pcc_country.set_index(['Entity','Continent','Year'])
data_socio_country = data_socio_country.set_index(['Entity','Continent','Year'])

df_social_energy = data_pcc_country.join(data_socio_country,how='outer').reset_index()
df_social_energy = df_social_energy.sort_values(['Year','Entity']).reset_index().drop(columns='index')
df_social_energy['Fraction of Low-carbon energy per capita'] = df_social_energy['Low-carbon energy per capita (kWh)']/df_social_energy['Energy per capita (kWh)']
col_int = ['GDP per capita ($)','Child mortality rate (under 5 years - %)','HDI','Life expectancy (years)',
           'Tertiary education (%)','Internet users (%)','Tax revenue of total GDP (%)']
df_social_energy = df_social_energy.sort_values(['Year','Continent','Entity']).reset_index().drop(columns='index')

In [None]:
df = df_social_energy

In [None]:
x = col_int[0]
y = "Fraction of Low-carbon energy per capita"
df_int = (df_social_energy.iloc[np.sum(np.array(df_social_energy[[x,y]].isnull())*1.0,axis=1) == 0]
            .reset_index()
            .drop(columns='index'))

In [None]:
df_int

In [None]:
cont = False
trend = True
scatter = False

x = col_int[0]
y = "Fraction of Low-carbon energy per capita"
df_int = (df_social_energy.iloc[np.sum(np.array(df_social_energy[[x,y]].isnull())*1.0,axis=1) == 0]
            .reset_index()
            .drop(columns='index'))

if cont:
    color = 'Continent'
else:
    color = None

if scatter:
    size = "Energy per capita (kWh)"
else:
    size = df_int[x]*0

if trend:
    scope = 'trace'
    type = 'lowess'
    frac = 0.6
else:
    scope = None
    type = None
    frac = None
    
fig1 = px.scatter(df_int, 
                x=x, y=y,
                size=size,
                color=color,
                animation_frame="Year", animation_group="Entity",
                hover_name="Entity", log_x=False, size_max=60,
                range_x=[np.min(df_int[x]),np.max(df_int[x])*1.1], 
                range_y=[-0.2,1.2],
                trendline_scope=scope,
                trendline=type, 
                trendline_options=dict(frac=frac))

fig1.update_layout(
    margin={"t": 0, "l": 0, "r": 0, "b": 0}
)

fig1.show()