In [174]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

In [None]:
from google.colab import drive
drive.mount ('/content/drive')

In [None]:
%load_ext google.colab.data_table

In [177]:
# 1. What is the biggest predictor of a large CO2 output per capita of a country?

In [None]:
# Load dataframe1
co2_emission = pd.read_csv("/content/drive/MyDrive/RawData/co-emissions-per-capita.csv", encoding='latin-1')
co2_emission

In [None]:
# Load dataframe2
GDP = pd.read_csv("/content/drive/MyDrive/RawData/gdp-per-capita-worldbank.csv", encoding='latin-1')
GDP

In [None]:
# Load dataframe3
green_consumption = pd.read_csv("/content/drive/MyDrive/RawData/share-of-final-energy-consumption-from-renewable-sources.csv", encoding='latin-1')
green_consumption

In [None]:
# Load dataframe4
transport = pd.read_csv("/content/drive/MyDrive/RawData/per-capita-co2-transport.csv", encoding='latin-1')
transport

In [None]:
# Load dataframe5
green_sources = pd.read_csv("/content/drive/MyDrive/RawData/share-of-electricity-production-from-renewable-sources.csv", encoding='latin-1')
green_sources

In [None]:
# Merge dataframes
merged_df = pd.merge(co2_emission, GDP, on=['Entity', 'Year', 'Code'])
merged_df = pd.merge(merged_df, green_consumption, on=['Entity', 'Year', 'Code'])
merged_df = pd.merge(merged_df, transport, on=['Entity', 'Year', 'Code'])
merged_df = pd.merge(merged_df, green_sources, on=['Entity', 'Year', 'Code'])
merged_df

In [None]:
# Rename columnnames
merged_df.rename(columns={'Annual COâ emissions (per capita)': 'CO2 emissions per capita'}, inplace=True)
merged_df.rename(columns={'GDP per capita, PPP (constant 2017 international $)': 'GDP per capita'}, inplace=True)
merged_df.rename(columns={'7.2.1 - Renewable energy share in the total final energy consumption (%) - EG_FEC_RNEW': 'Renewable energy share'}, inplace=True)
merged_df.rename(columns={'Per capita carbon dioxide emissions from transport': 'CO2 emission from transport'}, inplace=True)
merged_df.rename(columns={'Renewable electricity (% electricity production) (World Bank (2015))': 'Renewable production'}, inplace=True)
merged_df

In [None]:
# Select a list of the 25 largest countries in the world by gross domestic product (GDP)
selected_countries = ['United States','China','Japan','Germany','India','France','United Kingdom','Brazil','Italy','Canada','South Korea','Australia','Spain','Mexico','Indonesia','Netherlands','Russia','Switzerland','Saudi Arabia','Turkey','Argentina','Taiwan','Sweden','Poland','Belgium','Thailand']
filtered_df = merged_df[merged_df['Entity'].isin(selected_countries)]
filtered_df

In [None]:
# To answer the question I have made a selection of the largest countries in the world based on GNP. After this, I looked at different data in descending order.
# The most important thing is to see which of the 25 countries have the most CO2 emissions per capita.
# After this, I looked at various factors such as transport, GDP per capita, the generation of electricity from green sources and then the use of electricity from renewable sources.
# I also made a top 5 each time to see what conclusion I can make from the different data.
# If we look at the countries with the most CO2 emissions per capita, namely: the United States, Australia, Canada, Saudi Arabia and South Korea, I can always see which countries are or are not in the following top 5 lists.
# From this I can conclude that transport is a very important factor in total CO2 emissions. Four out of five countries are in the top 5 for total CO2 emissions per capita and CO2 emissions from transport.
# It is also striking that a high score in GNP per capita does not necessarily mean that CO2 emissions in a country score high.
# Only one country appears in the top 5, which is also in the top 5 of countries with the highest CO2 emissions.
# Finally, it can be concluded from the data that when a country produces a lot of renewable electricity itself or imports and consumes renewable electricity in its total use, except for one country, it does not appear in the top 5 of countries with the highest CO2 emissions.
# So a country will emit less CO2 if it generates a lot of green energy itself or consumes a lot of green energy in the total energy mix.

# Check if there are still rows left in the DataFrame
if not filtered_df.empty:
    # Set the style for the plots
    sns.set(style="whitegrid")

    # Define the desired order of variables
    desired_order = ['CO2 emissions per capita', 'CO2 emission from transport', 'GDP per capita', 'Renewable production', 'Renewable energy share']

    # Create subplots with a larger figuresize for the original line plots
    fig, axes = plt.subplots(nrows=len(desired_order), ncols=1, figsize=(20, 12 * len(desired_order)))

    # Plot each variable in the specified order
    for i, variable in enumerate(desired_order):
        sorted_df = filtered_df.sort_values(by=variable, ascending=False)
        sns.lineplot(x='Year', y=variable, data=sorted_df, ax=axes[i], hue='Entity', palette='deep', linewidth=2)
        axes[i].set_ylabel(variable)
        axes[i].set_xlabel('Year')  # Set x-axis label to 'Year'
        axes[i].set_title(f'{variable} over time')

        # Rotate x-axis labels for better readability
        axes[i].tick_params(axis='x', rotation=45)

        # Set x-axis ticks with steps of 5 years
        axes[i].xaxis.set_major_locator(ticker.MultipleLocator(base=5))

        # Adjust xlim to increase space on both sides
        axes[i].set_xlim(sorted_df['Year'].min() - 0, sorted_df['Year'].max() + 2)

    # Manually adjust space between subplots
    plt.subplots_adjust(hspace=0.5)

    # Set layout for the original line plots
    plt.show()

    # Create 5 new plots for the top 5 countries vertically with wider figures
    fig_top5, axes_top5 = plt.subplots(nrows=len(desired_order), ncols=1, figsize=(20, 12 * len(desired_order)))

    # Plot top 5 countries for each variable
    for i, variable in enumerate(desired_order):
        # Group by 'Entity' and calculate the mean for each country
        top10_df = filtered_df.groupby('Entity')[variable].mean().reset_index().nlargest(5, columns=variable)

        # Plot the top 5 countries vertically with wider figures
        sns.barplot(x='Entity', y=variable, data=top10_df, ax=axes_top5[i], palette='deep')

        # Set axis labels and title
        axes_top5[i].set_ylabel(variable)
        axes_top5[i].set_xlabel('Country')
        axes_top5[i].set_title(f'Top 5 {variable}')

    # Manually adjust space between subplots for the top 5 plots
    plt.subplots_adjust(hspace=0.5)

    # Set layout for the top 5 plots
    plt.show()

In [187]:
# 2. Which countries are making the biggest strides in decreasing CO2 output?

In [None]:
# Load dataframe1
co2_emission = pd.read_csv("/content/drive/MyDrive/RawData/annual-co2-emissions-per-country.csv", encoding='latin-1')
co2_emission

In [None]:
# Rename columnname
co2_emission.rename(columns={'Annual COâ emissions': 'CO2 emissions','Entity': 'Country'}, inplace=True)
co2_emission

In [190]:
# Create list of countries
country_list = ["Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina Faso", "Burundi", "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad", "Chile", "China", "Colombia", "Comoros", "Congo", "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "East Timor (Timor-Leste)", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Eswatini", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Ivory Coast", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, North", "Korea, South", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco", "Mozambique", "Myanmar (Burma)", "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", "Nigeria", "North Macedonia", "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"]

In [None]:
# Filter only countries and select data from 1940 to present
co2_emission_filtered = co2_emission[co2_emission['Country'].isin(country_list) & (co2_emission['Year'] >= 1940) & (co2_emission['Year'] <= 2021)]
co2_emission_filtered

In [None]:
# Load dataframe2
population = pd.read_csv("/content/drive/MyDrive/RawData/population-and-demography.csv", encoding='latin-1')
population

In [None]:
# Select columns
selected_columns = ['Country name', 'Year', 'Population']
population_filtered = population[selected_columns]
population_filtered

In [None]:
# Rename columnname
population_filtered.rename(columns={'Country name': 'Country'}, inplace=True)
population_filtered

In [None]:
# Merge dataframes
merged_data = pd.merge(co2_emission_filtered, population_filtered, on=['Country', 'Year'])
merged_data

In [None]:
# Add a new column for CO2 emissions per capita
merged_data['CO2 per capita'] = merged_data['CO2 emissions'] / merged_data['Population']
merged_data

In [None]:
# Filter the dataset for the years between 2000 and 2021
filtered_data = merged_data[(merged_data['Year'] >= 2000) & (merged_data['Year'] <= 2021)]
filtered_data

In [None]:
# Calculate the difference in CO2 per capita between 2000 and 2021 per country
difference_by_country = filtered_data.groupby('Country')['CO2 per capita'].last() - filtered_data.groupby('Country')['CO2 per capita'].first()
difference_by_country

In [None]:
# Calculate the relative change in CO2 per capita (percentage)
relative_change_by_country = (difference_by_country / filtered_data.groupby('Country')['CO2 per capita'].first()) * -100
relative_change_by_country

In [None]:
# Create a DataFrame of the relative change
relative_change_df = pd.DataFrame({'Country': relative_change_by_country.index, 'Relative Change (%)': relative_change_by_country.values})
relative_change_df

In [None]:
# Sort the results by relative change in descending order and select the top 25
top_25_changes_by_country = relative_change_df.sort_values(by='Relative Change (%)', ascending=False).head(25)
top_25_changes_by_country

In [None]:
# To answer the question of which countries have made the biggest steps in limiting CO2 emissions, I looked at the available data on CO2 emissions per capita.
# I then looked at a period from 2020 to 2021 and made a selection of the top 25 countries.
# Looking at the relative change that has been made since 2020 has provided a clear overview of countries that have made the biggest steps in reducing CO2 emissions per capita.

# Plot results
plt.figure(figsize=(14, 10))
plt.barh(top_25_changes_by_country['Country'], top_25_changes_by_country['Relative Change (%)'], color='green')
plt.xlabel('Relative Change in CO2 per Capita (%)')
plt.title('Top 25 Countries with Greatest Relative Progress in CO2 per Capita (2020-2021)')
plt.yticks(rotation=0)
plt.show()

In [203]:
# 3. Which non-fossil fuel energy technology will have the best price in the future?

# Because i couldn't find available data with prices of non-fossil fuels, I want to show what is available of current green sources and which source is growing fastest in share of total renewable generation in order to provide insight.

In [None]:
# Load dataframe1
elec_sources = pd.read_csv("/content/drive/MyDrive/RawData/share-elec-by-source.csv", encoding='latin-1')
elec_sources

In [None]:
# Select columns i want to use
selected_columns = ['Entity', 'Year', 'Hydro (% electricity)', 'Solar (% electricity)',
                    'Wind (% electricity)',
                    'Other renewables excluding bioenergy (% electricity)',
                    'Bioenergy (% electricity)']

# Filter the data where 'Entity' is equal to 'World' and remove NaN values
filtered_sources_df = elec_sources[elec_sources['Entity'] == 'World'][selected_columns].dropna()
filtered_sources_df

In [None]:
# Convert numerical columns to float
for col in selected_columns[2:]:
    filtered_sources_df[col] = filtered_sources_df[col].astype(float)

# Plot data
plt.figure(figsize=(10, 6))

for col in selected_columns[2:]:
    plt.plot(filtered_sources_df['Year'], filtered_sources_df[col], label=col)

plt.xlabel('Year')
plt.ylabel('Percentage of Electricity')
plt.title('Share of source in total electricity consumption')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.show()

In [None]:
# Load dataframe2
production_volume = pd.read_csv("/content/drive/MyDrive/RawData/modern-renewable-prod.csv", encoding='latin-1')
production_volume

In [None]:
# Filter the data where 'Entity' is equal to 'World' and remove NaN values
filtered_volume_df = production_volume[production_volume['Entity'] == 'World'].dropna()
filtered_volume_df

In [None]:
# Filter the dataset for the years between 2000 and 2021
filtered_volume_year_df = filtered_volume_df[(filtered_volume_df['Year'] >= 2000) & (filtered_volume_df['Year'] <= 2021)]
filtered_volume_year_df

In [None]:
# Make a copy of the filtered DataFrame
filtered_volume_year_df = filtered_volume_year_df.copy()

# Add new columns for annual growth by renewable source
for column in renewable_columns:
    # Create a new column name for annual growth
    growth_column_name = f'{column}_growth'

    # Calculate the annual growth and add it to the DataFrame
    filtered_volume_year_df.loc[:, growth_column_name] = (
        filtered_volume_year_df[column].pct_change() * 100
    )

# Delete the first row (where annual growth is not defined)
filtered_volume_year_df = filtered_volume_year_df.dropna()

filtered_volume_year_df

In [None]:
# In this graph you will see the annual growth per renewable source. It is clear that solar and wind in particular are growing relatively stably, while hydro is declining.
# Even though hydro generates the most volume at the moment. This was visible in the previous graph.
# It is difficult to make a statement on the question of which non-fossil fuel will have the best price in the future.
# Based on percentage growth, wind and solar would account for an increasing share of total energy consumption in the future.

# Select the relevant columns
growth_columns = [
    'Electricity from wind (TWh)_growth',
    'Electricity from hydro (TWh)_growth',
    'Electricity from solar (TWh)_growth',
    'Other renewables including bioenergy (TWh)_growth'
]

# Plot the annual growth for each renewable source
plt.figure(figsize=(12, 6))

for column in growth_columns:
    plt.plot(filtered_volume_year_df['Year'], filtered_volume_year_df[column], label=column)

plt.title('Annual growth of renewable sources (2000-2021)')
plt.xlabel('Year')
plt.ylabel('Annual growth (%)')
plt.legend()
plt.grid(True)
plt.show()