# Data Analysis

In [2]:
# Imports 
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import StandardScaler

In [3]:
# Load the data
data = pd.read_csv('data\owid-co2-data.csv')

In [4]:
# Display the data
data.head()

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,Afghanistan,1850,AFG,3752993.0,,,,,,,...,,,,,,,,,,
1,Afghanistan,1851,AFG,3767956.0,,,,,,,...,,0.157,0.0,0.0,0.0,0.0,,,,
2,Afghanistan,1852,AFG,3783940.0,,,,,,,...,,0.156,0.0,0.0,0.0,0.0,,,,
3,Afghanistan,1853,AFG,3800954.0,,,,,,,...,,0.156,0.0,0.0,0.0,0.0,,,,
4,Afghanistan,1854,AFG,3818038.0,,,,,,,...,,0.155,0.0,0.0,0.0,0.0,,,,


In [5]:
# Display the data types
data.dtypes

country                         object
year                             int64
iso_code                        object
population                     float64
gdp                            float64
                                ...   
temperature_change_from_n2o    float64
total_ghg                      float64
total_ghg_excluding_lucf       float64
trade_co2                      float64
trade_co2_share                float64
Length: 79, dtype: object

### Data cleaning

In [6]:
# Check for missing values
data.isnull().sum()

country                            0
year                               0
iso_code                        7867
population                      8001
gdp                            32204
                               ...  
temperature_change_from_n2o     9575
total_ghg                      41061
total_ghg_excluding_lucf       41061
trade_co2                      43017
trade_co2_share                43018
Length: 79, dtype: int64

In [19]:
# Load dataset
df = pd.read_csv('data\owid-co2-data.csv')


# Start Dash app
app = Dash(__name__)

# Layout of the app
app.layout = html.Div([
    html.H1("Greenhouse Gas Emissions Analysis Platform", style={'textAlign': 'center', 'marginBottom': 20}),

    html.Div([
        html.Label("Select Country:"),
        dcc.Dropdown(
            id='country-dropdown',
            options=[{'label': country, 'value': country} for country in df['country'].unique()],
            value='World',
            style={'width': '50%', 'marginBottom': 20}
        )
    ], style={'textAlign': 'center'}),

    html.Div([
        dcc.Graph(id='emission-trend'),
    ], style={'marginBottom': 50}),


    html.Div([
        html.Label("Select Year:"),
        dcc.Dropdown(
            id='year-dropdown',
            options=[{'label': year, 'value': year} for year in df['year'].unique()],
            value=df['year'].max(),
            style={'width': '50%', 'marginBottom': 20}
        ),
        dcc.Graph(id='emission-map')
    ], style={'textAlign': 'center', 'marginBottom': 50})
])

@app.callback(
    Output('emission-trend', 'figure'),
    Input('country-dropdown', 'value')
)
def update_emission_trend(selected_country):
    country_data = df[df['country'] == selected_country]

    if 'co2' in country_data.columns:
        emissions_column = 'co2'
    elif 'cumulative_co2' in country_data.columns:
        emissions_column = 'cumulative_co2'
    else:
        return go.Figure().update_layout(title=f'No emissions data available for {selected_country}')

    country_data = country_data.dropna(subset=[emissions_column])

    if country_data.empty:
        return go.Figure().update_layout(title=f'No Data Available for {selected_country}', xaxis_title='Year', yaxis_title='CO2 emissions')

    fig = px.line(country_data, x='year', y=emissions_column, title=f'{selected_country} Greenhouse Gas Emissions Over Time')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

In [25]:
# Find correlation between columns
numeric_df = df.select_dtypes(include='number')
correlation = numeric_df.corr()
numeric_df.head()

Unnamed: 0,year,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,co2_including_luc,co2_including_luc_growth_abs,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,1850,3752993.0,,,,,,,,,...,,,,,,,,,,
1,1851,3767956.0,,,,,,,,,...,,0.157,0.0,0.0,0.0,0.0,,,,
2,1852,3783940.0,,,,,,,,,...,,0.156,0.0,0.0,0.0,0.0,,,,
3,1853,3800954.0,,,,,,,,,...,,0.156,0.0,0.0,0.0,0.0,,,,
4,1854,3818038.0,,,,,,,,,...,,0.155,0.0,0.0,0.0,0.0,,,,


In [34]:
# Display the correlation matrix
correlation_matrix = numeric_df.corr()

columns_of_interest = ['co2', 'gdp', 'population', 'cumulative_co2']  
correlation_matrix_filtered = numeric_df[columns_of_interest].corr()

fig = px.imshow(correlation_matrix_filtered, text_auto=True, labels=dict(color='Correlation'))
fig.update_layout(title='Filtered Correlation Matrix Heatmap', width=800, height=600)
fig.update_xaxes(tickangle=45)
fig.show()

In [38]:
# Select relevant columns for correlation
correlation_data = data[['gdp', 'population', 'co2', 'cumulative_co2', "total_ghg"]]

# Drop rows with missing values to avoid calculation errors
correlation_data = correlation_data.dropna()

# Calculate the correlation matrix
correlation_matrix = correlation_data.corr()

# Display the correlation matrix
correlation_matrix

Unnamed: 0,gdp,population,co2,cumulative_co2,total_ghg
gdp,1.0,0.938576,0.977784,0.977087,0.981821
population,0.938576,1.0,0.942418,0.887566,0.959142
co2,0.977784,0.942418,1.0,0.964965,0.994183
cumulative_co2,0.977087,0.887566,0.964965,1.0,0.961094
total_ghg,0.981821,0.959142,0.994183,0.961094,1.0


In [41]:
# Select only numerical columns for correlation calculation
numerical_data = data.select_dtypes(include=['number'])

# Calculate the correlation matrix for all numerical columns
correlation_matrix_all_numeric = numerical_data.corr()

# Display the correlation matrix
correlation_matrix_all_numeric

Unnamed: 0,year,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,co2_including_luc,co2_including_luc_growth_abs,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
year,1.000000,0.069914,0.079754,0.102981,0.312224,0.142780,0.046398,-0.009721,0.076471,0.010471,...,-0.726284,-0.003506,0.165236,0.154614,0.157873,0.167180,0.029770,0.032313,0.006118,0.064661
population,0.069914,1.000000,0.906843,0.802239,0.037464,0.842332,0.502810,-0.004865,0.908075,0.348481,...,0.590498,0.738238,0.938839,0.849670,0.883132,0.887780,0.945268,0.934041,-0.314998,-0.124726
gdp,0.079754,0.906843,1.000000,0.912824,0.077386,0.965254,0.311429,-0.002278,0.948413,0.171332,...,0.395960,0.672933,0.958469,0.959959,0.966761,0.976314,0.981821,0.980726,-0.070153,-0.056733
cement_co2,0.102981,0.802239,0.912824,1.000000,0.150730,0.890979,0.482679,-0.003283,0.853730,0.313276,...,0.405180,0.461275,0.853453,0.792499,0.818248,0.830563,0.917280,0.921456,-0.394426,-0.107764
cement_co2_per_capita,0.312224,0.037464,0.077386,0.150730,1.000000,0.141605,0.064110,-0.009446,0.096567,0.033119,...,-0.296477,-0.010834,0.089212,0.101050,0.099593,0.097290,0.091824,0.100188,-0.030735,-0.167459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
temperature_change_from_n2o,0.167180,0.887780,0.976314,0.830563,0.097290,0.962422,0.426666,-0.005000,0.953415,0.271134,...,0.502179,0.648758,0.966588,0.980009,0.987345,1.000000,0.977843,0.976758,-0.033170,-0.125654
total_ghg,0.029770,0.945268,0.981821,0.917280,0.091824,0.990723,0.465489,-0.012550,0.996540,0.295555,...,0.937712,0.972068,0.988882,0.967913,0.983110,0.977843,1.000000,0.997862,-0.182024,-0.117277
total_ghg_excluding_lucf,0.032313,0.934041,0.980726,0.921456,0.100188,0.996811,0.461929,-0.013050,0.996665,0.292195,...,0.943873,0.970251,0.981427,0.970731,0.983016,0.976758,0.997862,1.000000,-0.171555,-0.113502
trade_co2,0.006118,-0.314998,-0.070153,-0.394426,-0.030735,-0.130611,-0.357979,-0.038052,-0.180992,-0.227191,...,-0.259807,-0.053840,-0.207706,0.000514,-0.056494,-0.033170,-0.182024,-0.171555,1.000000,0.143002
