## Imports

In [74]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pycountry
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [75]:
world = pd.read_csv('data/World Energy Consumption.csv')

## Data Overview

### All Data

In [76]:
world.head()

Unnamed: 0,country,year,iso_code,population,gdp,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita,biofuel_consumption,biofuel_elec_per_capita,...,solar_share_elec,solar_share_energy,wind_cons_change_pct,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_electricity,wind_energy_per_capita,wind_share_elec,wind_share_energy
0,ASEAN (Ember),2000,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
1,ASEAN (Ember),2001,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
2,ASEAN (Ember),2002,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
3,ASEAN (Ember),2003,,,,,,,,,...,0.0,,,,,,0.0,,0.0,
4,ASEAN (Ember),2004,,,,,,,,,...,0.0,,,,,,0.0,,0.0,


### Number of Rows

In [77]:
len(world)

22012

### Null values

In [78]:
world.isnull().sum()

country                       0
year                          0
iso_code                   5500
population                 3889
gdp                       10899
                          ...  
wind_elec_per_capita      14947
wind_electricity          14016
wind_energy_per_capita    17947
wind_share_elec           15126
wind_share_energy         17911
Length: 129, dtype: int64

### Columns

In [79]:
world.columns

Index(['country', 'year', 'iso_code', 'population', 'gdp',
       'biofuel_cons_change_pct', 'biofuel_cons_change_twh',
       'biofuel_cons_per_capita', 'biofuel_consumption',
       'biofuel_elec_per_capita',
       ...
       'solar_share_elec', 'solar_share_energy', 'wind_cons_change_pct',
       'wind_cons_change_twh', 'wind_consumption', 'wind_elec_per_capita',
       'wind_electricity', 'wind_energy_per_capita', 'wind_share_elec',
       'wind_share_energy'],
      dtype='object', length=129)

### Data Types

In [80]:
world.dtypes

country                    object
year                        int64
iso_code                   object
population                float64
gdp                       float64
                           ...   
wind_elec_per_capita      float64
wind_electricity          float64
wind_energy_per_capita    float64
wind_share_elec           float64
wind_share_energy         float64
Length: 129, dtype: object

## Pre-Processing Data

### Remove columns with more than 50% of null values

In [81]:
def remove_null_sup_to_50(data):
    for col in data.columns:
        if data[col].isnull().sum()>(len(data)/2):
            data.drop(columns=[col],inplace=True)
    return data

In [82]:
world = remove_null_sup_to_50(world)

In [83]:
world.head()

Unnamed: 0,country,year,iso_code,population,gdp,coal_prod_change_twh,coal_prod_per_capita,coal_production,energy_cons_change_pct,energy_cons_change_twh,gas_prod_change_twh,gas_prod_per_capita,gas_production,oil_prod_change_twh,oil_prod_per_capita,oil_production,primary_energy_consumption
0,ASEAN (Ember),2000,,,,,,,,,,,,,,,
1,ASEAN (Ember),2001,,,,,,,,,,,,,,,
2,ASEAN (Ember),2002,,,,,,,,,,,,,,,
3,ASEAN (Ember),2003,,,,,,,,,,,,,,,
4,ASEAN (Ember),2004,,,,,,,,,,,,,,,


In [84]:
world.isnull().sum()

country                           0
year                              0
iso_code                       5500
population                     3889
gdp                           10899
coal_prod_change_twh           5773
coal_prod_per_capita           7294
coal_production                5518
energy_cons_change_pct         9895
energy_cons_change_twh         9702
gas_prod_change_twh            5496
gas_prod_per_capita            7116
gas_production                 5241
oil_prod_change_twh            4864
oil_prod_per_capita            6643
oil_production                 4607
primary_energy_consumption     9424
dtype: int64

## Coal Production

### Get the list of countries

In [85]:
countries = [country.name for country in pycountry.countries]

### Take only the non-null values

In [86]:
coal = world.query("coal_production.isna()==False")

### Keep only the countries present in the list

We want to avoid to get names of continents or regions, and keep only the countries.

In [87]:
coal = coal.query("country.isin(@countries)")

### Sort by year and production

In [88]:
coal_sorted = coal.sort_values(['year', 'coal_production'], ascending=[True, False])


### For each year, keep only the 10 first countries in terms of production

In [89]:
top_10_coal = coal_sorted.groupby('year').head(10)

In [90]:
top_10_coal.dtypes

country                        object
year                            int64
iso_code                       object
population                    float64
gdp                           float64
coal_prod_change_twh          float64
coal_prod_per_capita          float64
coal_production               float64
energy_cons_change_pct        float64
energy_cons_change_twh        float64
gas_prod_change_twh           float64
gas_prod_per_capita           float64
gas_production                float64
oil_prod_change_twh           float64
oil_prod_per_capita           float64
oil_production                float64
primary_energy_consumption    float64
dtype: object

### Keep only the essential columns : country, year, and production

In [91]:
top_10_coal = top_10_coal[['country','year','coal_production']]

In [92]:
top_10_coal

Unnamed: 0,country,year,coal_production
20588,United States,1900,1829.395
20465,United Kingdom,1900,1566.457
7681,Germany,1900,647.411
7232,France,1900,269.726
16332,Poland,1900,190.975
...,...,...,...
17933,South Africa,2022,1485.460
10271,Kazakhstan,2022,502.123
16454,Poland,2022,473.361
4528,Colombia,2022,459.383


## Gas Production

### Take only the non-null values

In [93]:
gas = world.query("gas_production.isna()==False")

### Keep only the countries present in the list

In [94]:
gas = gas.query("country.isin(@countries)")

### Sort by year and production

In [95]:
gas_sorted = gas.sort_values(['year', 'gas_production'], ascending=[True, False])

### For each year, keep only the 10 first countries in terms of production

In [96]:
top_10_gas = gas_sorted.groupby('year').head(10)

### Keep only the essential columns : country, year, and production

In [97]:
top_10_gas = top_10_gas[['country','year','gas_production']]

## Oil Production

### Take only the non-null values

In [98]:
oil = world.query("oil_production.isna()==False")

### Keep only the countries present in the list

In [99]:
oil = oil.query("country.isin(@countries)")

### Sort by year and production

In [100]:
oil_sorted = oil.sort_values(['year', 'oil_production'], ascending=[True, False])

### For each year, keep only the 10 first countries in terms of production

In [101]:
top_10_oil = oil_sorted.groupby('year').head(10)

### Keep only the essential columns : country, year, and production

In [102]:
top_10_oil = top_10_oil[['country','year','oil_production']]

## Figures

### Coal Production

In [103]:
# Créer une nouvelle DataFrame pour stocker les pays dans le top 10 de chaque année
top_10_per_year_coal = top_10_coal.groupby('year').apply(lambda x: x.nlargest(10, 'coal_production')).reset_index(drop=True)

# Créer les graphiques en secteurs pour chaque année
fig = go.Figure()

for year in top_10_per_year_coal['year'].unique():
    year_data = top_10_per_year_coal[top_10_per_year_coal['year'] == year]
    
    fig.add_trace(go.Pie(
        labels=year_data['country'],
        values=year_data['coal_production'],
        name=str(year),
        title=f'The 10 countries producing the most coal in {year}',
        hoverinfo='label+percent',
        textinfo='value+label',
        showlegend=True
    ))

# Définir les paramètres de mise en page
fig.update_layout(
    title_text='Top 10 of coal producers (1900-today)',
    updatemenus=[{
        'buttons': [
            {
                'method': 'update',
                'label': str(year),
                'args': [{'visible': [year == y for y in top_10_per_year_coal['year'].unique()]}]
            } for year in top_10_per_year_coal['year'].unique()
        ],
        'direction': 'down',
    }]
)

# Afficher l'animation
fig.show()





### Gas Production

In [104]:
# Créer une nouvelle DataFrame pour stocker les pays dans le top 10 de chaque année
top_10_per_year_gas = top_10_gas.groupby('year').apply(lambda x: x.nlargest(10, 'gas_production')).reset_index(drop=True)

# Créer les graphiques en secteurs pour chaque année
fig = go.Figure()

for year in top_10_per_year_gas['year'].unique():
    year_data = top_10_per_year_gas[top_10_per_year_gas['year'] == year]
    
    fig.add_trace(go.Pie(
        labels=year_data['country'],
        values=year_data['gas_production'],
        name=str(year),
        title=f'The 10 countries producing the most gas in {year}',
        hoverinfo='label+percent',
        textinfo='value+label',
        showlegend=True
    ))

# Définir les paramètres de mise en page
fig.update_layout(
    title_text='Top 10 of gas producers (1900-today)',
    updatemenus=[{
        'buttons': [
            {
                'method': 'update',
                'label': str(year),
                'args': [{'visible': [year == y for y in top_10_per_year_gas['year'].unique()]}]
            } for year in top_10_per_year_gas['year'].unique()
        ],
        'direction': 'down',
    }]
)

# Afficher l'animation
fig.show()





### Oil Production

In [105]:
# Créer une nouvelle DataFrame pour stocker les pays dans le top 10 de chaque année
top_10_per_year_oil = top_10_oil.groupby('year').apply(lambda x: x.nlargest(10, 'oil_production')).reset_index(drop=True)

# Créer les graphiques en secteurs pour chaque année
fig = go.Figure()

for year in top_10_per_year_oil['year'].unique():
    year_data = top_10_per_year_oil[top_10_per_year_oil['year'] == year]
    
    fig.add_trace(go.Pie(
        labels=year_data['country'],
        values=year_data['oil_production'],
        name=str(year),
        title=f'The 10 countries producing the most oil in {year}',
        hoverinfo='label+percent',
        textinfo='value+label',
        showlegend=True
    ))

# Définir les paramètres de mise en page
fig.update_layout(
    title_text='Top 10 of oil producers (1900-today)',
    updatemenus=[{
        'buttons': [
            {
                'method': 'update',
                'label': str(year),
                'args': [{'visible': [year == y for y in top_10_per_year_oil['year'].unique()]}]
            } for year in top_10_per_year_oil['year'].unique()
        ],
        'direction': 'down',
    }]
)

# Afficher l'animation
fig.show()





## Export

In [106]:
coal = coal[['country','year','coal_production','coal_prod_per_capita','coal_prod_change_twh']]

In [107]:
coal.to_csv('outputs/coal.csv',index=False)

In [108]:
oil = oil[['country','year','oil_production','oil_prod_per_capita','oil_prod_change_twh']]

In [109]:
oil.to_csv('outputs/oil.csv')

In [110]:
gas = gas[['country','year','gas_production','gas_prod_per_capita','gas_prod_change_twh']]

In [111]:
gas.to_csv('outputs/gas.csv')