In [1]:
import pymongo                      # Module for MongoDB access and connection
from pymongo import MongoClient
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.reset_option('future.no_silent_downcasting', True)
import warnings
warnings.filterwarnings('ignore')
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from dotenv import load_dotenv
from pathlib import Path
import os
from dash import Dash, html, dash_table, dcc, callback, Output, Input

In [2]:
env_path = Path('..') / '.env'

In [5]:
if(load_dotenv(dotenv_path=env_path)):
    uri = os.environ.get("mongo_uri_1")
else:
    print(".env file does not have required values!")

In [7]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
#uri = "mongodb+srv://Aniket:Anirane15isro@cluster0.3ko5x.mongodb.net/?appName=Cluster0"
# Creating a client and connecting to the server
client = MongoClient(uri, server_api=ServerApi('1'))

In [9]:
db = client.energy_database
collection = db.country_energy_stats

In [11]:
# Plot-1
def world_population_plot(collection):
    query = [ {
    "$group": {
        "_id": "$year",
            "totalpop": {"$sum": "$population"}
        }
      }
    ]
    
    result = list(collection.aggregate(query))
    df = pd.DataFrame(result)
    df.rename(columns={'_id':'year'},inplace=True)
    df = df[df['year'] < 2023]
    df = df.sort_values(by='year')

    fig_1 = px.line(df, df['year'], df['totalpop'], title='World Population over the years')
    return fig_1

In [13]:
# Plot-2
def global_elec_gen_by_source(collection):
    query_1 = [ {
    "$group": {
        "_id": "$year",
        "biofuel_electricity": {"$sum": "$biofuel_electricity"},
        "oil_electricity": {"$sum": "$oil_electricity"},
        "renewables_electricity": {"$sum": "$renewables_electricity"},
        "gas_electricity": {"$sum": "$gas_electricity"},
        "low_carbon_electricity": {"$sum": "$low_carbon_electricity"},
        "coal_electricity": {"$sum": "$coal_electricity"},
        "fossil_electricity": {"$sum": "$fossil_electricity"},
        "electricity_generation": {"$sum": "$electricity_generation"}
        }
      }
    ]
    result_1 = list(collection.aggregate(query_1))
    df_1 = pd.DataFrame(result_1)
    df_1.rename(columns={'_id':'year'},inplace=True)
    df_1 = df_1.sort_values(by='year')
    dat_1 = df_1.melt(id_vars="year",value_vars=['biofuel_electricity','oil_electricity','renewables_electricity','gas_electricity','low_carbon_electricity','coal_electricity','fossil_electricity','electricity_generation'],var_name = 'Electricity Source', value_name = 'Electricity Generation')
    fig_2 = px.area(dat_1,x='year',y='Electricity Generation',color='Electricity Source', title='Global Electricity Generation by source vs Total Electricity generated over the years',labels={'year':'Year'})
    return fig_2

In [15]:
# Melting the DataFrame to long format as it is required by plotly for group plotting
query_2 = [ {
    "$group": {
    "_id": {
        "year":"$year",
        "country":"$country"
    },   
    "biofuel_electricity": {"$sum": "$biofuel_electricity"},
    "oil_electricity": {"$sum": "$oil_electricity"},
    "renewables_electricity": {"$sum": "$renewables_electricity"},
    "gas_electricity": {"$sum": "$gas_electricity"},
    "low_carbon_electricity": {"$sum": "$low_carbon_electricity"},
    "fossil_electricity": {"$sum": "$fossil_electricity"},
    "electricity_generation": {"$sum": "$electricity_generation"}
        }
    }
]

result_2 = list(collection.aggregate(query_2))
df_2 = pd.DataFrame(result_2)
# We will unpack the nested '_id' column and create separate year and country column and then drop the original column
df_2['year'] = df_2['_id'].apply(lambda x: x['year'])
df_2['country'] = df_2['_id'].apply(lambda x: x['country'])
df_2.drop(columns='_id',inplace=True)
df_2 = df_2.sort_values(by=['year','country'])

df_co = df_2.melt(
    id_vars=['year', 'country'],                                   # variables for data aggregation
    value_vars=[                                                   # features to be plotted with respect to year and country
        'oil_electricity',
        'biofuel_electricity',
        'gas_electricity',
        'renewables_electricity',
        'low_carbon_electricity',
        'fossil_electricity',
        'electricity_generation'
    ],
    var_name='Electricity Source',                                # Creating a variable that will define selection by source
    value_name='Electricity Generation'                           # values plotted
)

# Plot-3
def elec_gen_by_source_per_country(collection):
    # Creating the initial figure with one country
    initial_country = df_co['country'].unique()[0]                    # Extracting the 1st country from 'country' column for plotting before user selection
    fig_3 = px.area(                                                    # plotly function for area plot
        df_co[df_co['country'] == initial_country],
        x='year',                                                     # defining x-axis
        y='Electricity Generation',                                   # defining y-axis
        color='Electricity Source',                                   # color for area chart on the basis of source
        title=f'Electricity Generation by Source in {initial_country}'  # title of the chart
    )

    # Creating dropdown
    buttons = []

    for country in df_co['country'].unique():                        # Looping through countries for adding unique options to the dropdown
        filtered_df = df_co[df_co['country'] == country]
    
        buttons.append(                                              # updating the buttons empty list with unique countries
            dict(
                method='update',
                label=country,
                args=[
                    {
                        'x': [filtered_df['year']],
                        'y': [filtered_df[filtered_df['Electricity Source'] == source]['Electricity Generation'].values for source in filtered_df['Electricity Source'].unique()],
                        'type': 'scatter'                            # creating area to be filled
                    },
                    {
                        'title': f'Electricity Generation by Source in {country}'   # title for the chart on the basis of selected country
                    }
                ]
            )
        )

    # Add dropdown to the figure
    fig_3.update_layout(
        updatemenus=[                                                # Adding the dropdown menu to the plot
            dict(
                buttons=buttons,
                direction="down",                                    # Direction in which dropdown options will be displayed
                showactive=True,                                     # displays the country for which the graph is plotted
                x=1.05,                                              # aligning the dropdown
                xanchor="left",
                y=1.25,
                yanchor="top"
            )
        ]
    )

    return fig_3

In [16]:
# Plot-4
def top_10_countries_by_pop(collection):
    # Getting the data for year 2000
    quer_1 = {'year':2000}
    quer_2 = {'year':2023}
    result_2000 = list(collection.find(quer_1).sort('population',-1).limit(10))
    result_2023 = list(collection.find(quer_2).sort('population',-1).limit(10))
    df_2000 = pd.DataFrame(result_2000)
    df_2023 = pd.DataFrame(result_2023)
    fig_4 = make_subplots(rows=1,cols=2,specs=[[{'type': 'domain'}, {'type': 'domain'}]],subplot_titles=['Top 10 populous countries in 2000','Top 10 populous countries in 2023'])

    #for the year 2000
    fig_4.add_trace(go.Pie(labels=df_2000['country'],
              values=df_2000['population'],
              name='2000',
              textinfo='percent+label'),row=1,col=1)

    #for the year 2023
    fig_4.add_trace(go.Pie(labels=df_2023['country'],
              values=df_2023['population'],
              name='2023',
              textinfo='percent+label'),row=1,col=2)
    return fig_4

In [17]:
# Plot-5
def elec_gen_per_capita_top_10_countries(collection):
    selected_countries = ['China','India','United States','Indonesia','Brazil','Pakistan','Russia','Bangladesh','Japan','Nigeria','Mexico']
    query_3 = {"country" : {"$in":selected_countries}}
    # selecting required columns
    projection = {
        "_id":0,                          # 0 means exclude that column and 1 means include
        "year":1,
        "country":1,
        "renewables_elec_per_capita":1,
        "low_carbon_elec_per_capita":1,
        "coal_elec_per_capita":1,
        "fossil_elec_per_capita":1,
        "per_capita_electricity":1
    }
    results_3 = list(collection.find(query_3,projection))
    df_3 = pd.DataFrame(results_3)
    # Plotly requires the data to be in this melted format for group plotting
    df_4 = df_3.melt(id_vars=["year","country"],value_vars=['renewables_elec_per_capita','low_carbon_elec_per_capita','coal_elec_per_capita','fossil_elec_per_capita','per_capita_electricity'],var_name = 'Electricity Source', value_name = 'Electricity Generation')

    # Creating a facetgrid area plot
    fig_5=px.area(
        df_4,
        x='year',
        y='Electricity Generation',
        color='Electricity Source',
        facet_col = 'country',
        facet_col_wrap=3,
        title=f"Electricity Generation by Source for selected countries",
        height=800
    )

    fig_5.update_layout(
        legend_title_text = 'Electricity Source'
            )

    return fig_5

In [21]:
collection_1 = db.continent_energy
# Plot-6
def elec_gen_per_capita_by_cont(collection_1):
    # Fetching the continents data from MongoDB
    collection_1 = db.continent_energy
    query = {"year":{"$lte":2022}}
    projection = {
        "_id":0,
        "year":1,
        "continent":1,
        "renewables_elec_per_capita":1,
        "oil_elec_per_capita":1,
        "gas_elec_per_capita":1,
        "low_carbon_elec_per_capita":1,
        "coal_elec_per_capita":1,
        "fossil_elec_per_capita":1,
        "per_capita_electricity":1
    }
    data_1=pd.DataFrame(list(collection_1.find(query,projection)))
    # Plotly requires the data to be in this melted format for group plotting
    df_1 = data_1.melt(id_vars=["year","continent"],value_vars=['renewables_elec_per_capita','oil_elec_per_capita','gas_elec_per_capita','low_carbon_elec_per_capita','coal_elec_per_capita','fossil_elec_per_capita','per_capita_electricity'],var_name = 'Electricity Source', value_name = 'Electricity Generation')

    # Creating a facetgrid area plot
    fig_6=px.area(
        df_1,
        x='year',
        y='Electricity Generation',
        color='Electricity Source',
        facet_col = 'continent',
        facet_col_wrap=3,
        title=f"Electricity Generation by Source for selected countries",
        height=500
    )

    fig_6.update_layout(
        legend_title_text = 'Electricity Source'
            )

    return fig_6

In [23]:
def greenhouse_gas_emit_by_continent(collection_1):
    collection_1 = db.continent_energy
    query_4 = {"year":{"$lte":2022}}
    projection_2 = {
        "_id":0,
        "year":1,
        "continent":1,
        "greenhouse_gas_emissions":1
    }
    data_2=pd.DataFrame(list(collection_1.find(query_4,projection_2)))
    fig_7 = px.line(data_2, data_2['year'], data_2['greenhouse_gas_emissions'], color = 'continent', markers=True,title='Greenhouse Gas Emissions by continents over the years')
    return fig_7

In [25]:
# def callback_bar():
#     @callback(
#         Output('elec_source_per_country', 'figure'),
#         Input('dropdown-selection', 'value')
#     )
#     def update_graph(selected_country):
#         dff = df_co[df_co.country==value]
#         return elec_gen_by_source_per_country(collection,selected_country)

In [27]:
def callback_area():
    @callback(
        Output('graph-content', 'figure'),
        Input('dropdown-selection', 'value')
    )
    def update_graph(selected_country):
        dff = df_co[df_co.country==value]
        return elec_gen_by_source_per_country(collection,selected_country)

In [33]:
def layout_02():
    year_options = sorted(df_02['year'].unique())
    return html.Div([
    dcc.Graph(figure=greenhouse_gas_emit_by_continent(collection_1)),
    dcc.Graph(figure=elec_gen_per_capita_by_cont(collection_1)),
    dcc.Graph(figure=elec_gen_by_source_per_country(collection)),
    dcc.Graph(figure=top_10_countries_by_pop(collection)])

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (947574769.py, line 7)