In [19]:
import os
import numpy as np
import pandas as pd
from neo4j import GraphDatabase
from dotenv import load_dotenv
from pathlib import Path 
import matplotlib.pyplot as plt
import psycopg2
from dash import Dash, html, dash_table, dcc
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go

In [4]:
check_query = """
OPTIONAL MATCH (n) WITH n LIMIT 1 
RETURN n IS NOT NULL AS IsNodesPresent
"""
country_query = """
MATCH (n:country) RETURN n.code as Code, n.zone as Zone, n.name as Name LIMIT 25
"""

emission_by_country_query = """
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
WHERE c.code = '{key}'
RETURN
    c.code as Code,
    c.zone as Zone,
    c.name as Name,
    i.ipcc_code as IPCC_Code,
    i.ipcc_name as IPCC_Code_Name,
    e.total_emission as Total_Emission
    ORDER BY Code
"""
emission_by_all_country_query = """
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
RETURN
    c.code as Code,
    c.name as Name,
    SUM(e.total_emission) as Total_Emission
    ORDER BY Total_Emission DESC LIMIT 5
"""

emission_by_country_year_query = """
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
WHERE i.ipcc_code = '{key1}'
RETURN
    c.code as Code,
    c.zone as Zone,
    c.name as Name,
    i.ipcc_code as IPCC_Code,
    i.ipcc_name as IPCC_Code_Name,
    e.`{key2}` as Y_{key2},
    e.total_emission as Total_Emission
    ORDER BY Code
"""

emission_by_secotrs_query = """
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
WHERE i.ipcc_code = '{key}'
RETURN
    c.name as Country_Name,
    i.ipcc_code as Code,
    i.ipcc_name as Name, 
    e.total_emission as Total_Emission
    ORDER BY Total_Emission DESC LIMIT 10
"""

emission_by_all_secotrs_query = """
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
RETURN
    c.name as Country_Name,
    i.ipcc_code as Code,
    i.ipcc_name as Name, 
    e.total_emission as Total_Emission
"""
complete_emission_query="""
MATCH (c:country)-[r1:UNDER_IPCC_CODE]->(i:ipcc)-[r2:HAS_PRODUCED]->(e:emission)
WITH c,i,e,r1,r2
RETURN
    c.code as Code,
    c.zone as Zone,
    c.name as Name,
    i.ipcc_code as IPCC_Code,
    i.ipcc_name as IPCC_Code_Name,
    properties(e) as emissions
ORDER BY Code
"""

In [21]:
def connectToDataBases():
    env_path = Path('..') / '.env'
    if(load_dotenv(dotenv_path=env_path)):
        host = os.environ.get("neo_host")
        username = os.environ.get("neo_username")
        password = os.environ.get("neo_password")
    else:
        print(".env file does not have required values!")
    driver = GraphDatabase.driver(host,auth=(username, password))
    return driver

In [22]:
def getCountry(query,name=""):
    with driver.session() as session: 
        result = session.run(query)
        data = [dict(record) for record in result]
        return pd.DataFrame(data)
    
def getRecords(query, values=[]):
    with driver.session() as session:
        f_query = ""
        if(len(values) > 0):
            if(len(values) == 1):
                f_query = query.format(key=values[0])
            elif(len(values) == 2):
                f_query = query.format(key1=values[0],key2=values[1])
            else:
                f_query = query
        else:
            f_query = query
        result = session.run(f_query)
        data = [dict(record) for record in result]
        return pd.DataFrame(data)

In [23]:
driver = connectToDataBases()

In [24]:
emission_by_country = getRecords(emission_by_country_query,["ABW"])
emission_by_secotrs= getRecords(emission_by_secotrs_query,["1.A.3.d"])
emission_by_all_sectors = getRecords(emission_by_all_secotrs_query)
emission_by_country_year = getRecords(emission_by_country_year_query,["1.A.3.d","2020"])
emission_by_all_countries = getRecords(emission_by_all_country_query)
init_complete_emission = getRecords(complete_emission_query)

In [25]:
def createCompleteData(df):
    records=[]
    for i, row in df.iterrows():
        emission_body={
            'Code':row["Code"],
            'Zone':row["Zone"],
            'Name':row["Name"],
            'IPCC_Code':row["IPCC_Code"],
            'IPCC_Code_Name':row["IPCC_Code_Name"],
        }
        for year_col in sorted(col for col in row['emissions'].keys()):
            emission_body[f"{year_col}"] = row['emissions'][year_col]
        records.append(emission_body)
    return pd.DataFrame(records)

In [26]:
complete_emission = createCompleteData(init_complete_emission)

In [27]:
year_cols = [col for col in complete_emission.columns if col.isdigit()]

## PostgreSQL connection

In [29]:
def connectToDB(db = "postgres"):
    try:
        env_path = Path('..') / '.env'
        if(load_dotenv(dotenv_path=env_path)):
            _host = os.environ.get("pg_host")
            _username = os.environ.get("pg_username")
            _password = os.environ.get("pg_password")
            _port = int(os.environ.get("pg_port"))
            conn = psycopg2.connect(database=db,
                    host=_host,
                    user=_username,
                    password=_password,
                    port=_port)
            return conn
        else:
            return None
    except (psycopg2.DatabaseError, Exception) as error:
        print(error)

In [30]:
conn = connectToDB("co2_emission")
cursor = conn.cursor()

In [2]:
all_county_query = """SELECT code,name FROM country ORDER BY name ASC"""

emission_by_year = """SELECT c.name,sum(e.Y_{year}) AS emission_rate FROM emission e 
INNER JOIN country c ON e.country_id = c.country_id GROUP BY c.name;"""

get_all_ipcc = """ SELECT ipcc_code,ipcc_name from ipcc ORDER BY ipcc_code ASC"""

In [32]:
cursor.execute(all_county_query)
country_list = [{"label":str(country[1]),"value":str(country[0])} for country in cursor.fetchall()]

In [33]:
cursor.execute(get_all_ipcc)
ipcc_codes = cursor.fetchall()
map_ipcc={
        "ipcc_code":[str(x[0]) for x in ipcc_codes],
        "ipcc_name":[str(x[1]) for x in ipcc_codes]
}
df_ipcc_list = pd.DataFrame(map_ipcc)

In [34]:
def getEmissionByYear(query,value="2000"):
    cursor.execute(query.format(year=value))
    return cursor.fetchall()

In [35]:
emissionByYear = getEmissionByYear(emission_by_year)
map_plot={
        "country":[str(x[0]) for x in emissionByYear],
        "emission":[str(x[1]) for x in emissionByYear]
}

In [None]:
# cursor.execute(get_all_ipcc)
# ipcc_codes = cursor.fetchall()

In [36]:
plot_data = {
    "Year":year_cols,
    'Emissions':complete_emission[year_cols].sum().values
}
plot_df=pd.DataFrame(plot_data)

In [5]:
def layout_01():
    return html.Div([
    html.H3(children="Intergovernmental Panel on Climate Change codes and its description"),
    dash_table.DataTable(data=df_ipcc_list.to_dict('records'),
        style_table={
            'maxHeight': '300px',
            'overflowY': 'auto',
            'border': 'thin lightgrey solid'
        },
        style_header={
            'backgroundColor': 'rgb(30, 30, 30)',
            'color': 'white',
            'fontWeight': 'bold'
        },
        style_cell={
            'padding': '10px',
            'textAlign': 'left',
            'border': '1px solid lightgrey'
        },
        style_data_conditional=[
            {
                'if': {'row_index': 'odd'},
                'backgroundColor': 'rgb(248, 248, 248)'
            }
    ]),
    dcc.Graph(figure=px.histogram(plot_df, x='Year', y='Emissions', histfunc='avg',title="Global CO2 Emission from fossil fule 2000-2023")),
    dcc.Graph(figure=px.line(plot_df,x='Year',y='Emissions',title="Trend of global CO2 Emission from fossil fule 2000-2023")),
    html.Div([
        dcc.Graph(figure =px.pie(emission_by_all_sectors, values='Total_Emission', names='Code', 
        title="Total CO2 Emission by IPCC code").update_traces(textposition='inside', textinfo='percent+label'))
    ],style={"display":"inline-block","width":"47%"}),
    html.Div([
        dcc.Graph(figure = px.bar(emission_by_all_countries.sort_values('Total_Emission', ascending=True), 
                                  y='Name', x='Total_Emission', orientation='h',title="Top 5 Countries of Total CO2 Emissions from 2000 - 2023"))
    ],style={"display":"inline-block","width":"47%", "float":"right"}),
    dcc.Dropdown(country_list, "BRA", id="country-dropdown"),
    html.Div(id="dd-output-container"),
    dcc.Graph(id="country-graph"),
    dcc.Graph(id="country-ipcc-pie-graph"),
    html.Div([
        html.Label("Select year:"),
        dcc.Dropdown([str(i) for i in range(2000,2024)], "2000", id="year-dropdown",),
    ], style={"display":"inline-block","width":"47%"}),
    html.Div([
        html.Label("Select Continent:"),
        dcc.Dropdown(["world" , "europe" , "asia" , "africa" , "north america","south america"], "europe", id="continent-dropdown"),
    ],style={"display":"inline-block","width":"47%", "float":"right"}),
    dcc.Graph(id="continent-map")],className="layout-1")

In [None]:
def callback_bar(app):
    @app.callback(
        Output('country-graph', 'figure'),
        Input('country-dropdown', 'value')
    )

    def update_bar(value):
        emission_by_country = getRecords(emission_by_country_query,[value])
        graph_df = emission_by_country[emission_by_country["Code"] == value]

        fig = px.bar(graph_df,x="IPCC_Code",y="Total_Emission",title=f'Emission by IPCC code for {graph_df["Name"][0]}')
        return fig

In [None]:
def callback_pie(app):
    @app.callback(
        Output('country-ipcc-pie-graph', 'figure'),
        Input('country-dropdown', 'value')
    )
    def update_pie(value):
        emission_by_country = getRecords(emission_by_country_query,[value])
        graph_df = emission_by_country[emission_by_country["Code"] == value]

        fig = px.pie(graph_df, values='Total_Emission', names='IPCC_Code', title=f'Emission by IPCC code for {graph_df["Name"][0]}').update_traces(textposition='inside', textinfo='percent+label')
        return fig

In [None]:
def callback_country_emission(app):
    @app.callback(
        Output("continent-map", "figure"),
        [Input("year-dropdown", "value"),
         Input("continent-dropdown", "value")]
    )
    def update_country_emission(year_value,continent_value):
        emissionByYear = getEmissionByYear(emission_by_year,year_value)
        map_plot={
            "country":[str(x[0]) for x in emissionByYear],
            "emission":[int(x[1]) for x in emissionByYear]
        }
        graph_df = pd.DataFrame(map_plot)
        fig = px.choropleth(graph_df,
                        locations="country",
                        locationmode="country names",
                        color="emission",
                        scope=continent_value,
                        title="Emission by Continent",
                        hover_name = "country",
                        hover_data = {"emission":True,"country":False},
                        labels={"emission":"Emission Rate"},
                        range_color=[0,13000000]
                        )
        fig.update_geos(showcountries=True, showcoastlines=True)
        fig.update_layout(height=600,coloraxis_colorbar={"title":"emission","tickformat":",.0f"})
        return fig

In [None]:
# def emission_by_sectors(app):
#     @app.callback(
#         Output('ipcc-bar-graph', 'figure'),
#         Input('country-dropdown', 'value')
#     )