# **Basic visualization**

## Objectives

* To identify and analyze appropriate graph types for effectively visualizing and validating hypotheses in the project, ensuring clear interpretation and accurate decision-making.


* Hypothesis explored: 

Hypothesis 1 : Average temperature has increased over the years

Hypothesis 2 : Countries with a higher percentage of renewable energy have lower CO2 emissions

Hypothesis 3 : A decrease in forest area percentage leads to an increase in extreme weather events

Hypothesis 4 : Higher population growth contributes to increased CO2 emissions and rising sea levels

## Inputs

* temperature.csv file

## Outputs

* Range of graphs

## Additional Comments





---

# Change working directory

* We are assuming you will store the notebooks in a subfolder, therefore when running the notebook in the editor, you will need to change the working directory

We need to change the working directory from its current folder to its parent folder
* We access the current directory with os.getcwd()

In [1]:
import os
current_dir = os.getcwd()
current_dir

'c:\\Users\\Ewa\\Documents\\vscode-projects\\GlobalEcoInsights2000-2024\\jupyter_notebooks'

We want to make the parent of the current directory the new current directory
* os.path.dirname() gets the parent directory
* os.chir() defines the new current directory

In [2]:
os.chdir(os.path.dirname(current_dir))
print("Globalecoinsights")

Globalecoinsights


Confirm the new current directory

In [3]:
current_dir = os.getcwd()
current_dir

'c:\\Users\\Ewa\\Documents\\vscode-projects\\GlobalEcoInsights2000-2024'

# Basic visualization

Loading Python Libraries

In [19]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.widgets import CheckButtons

---

In [25]:
# Load the dataset into a pandas dataframe using a relative path
df = pd.read_csv("temperature.csv")
df.head(5)

Unnamed: 0,Year,Country,Avg_Temperature_degC,CO2_Emissions_tons_per_capita,Sea_Level_Rise_mm,Rainfall_mm,Population,Renewable_Energy_pct,Extreme_Weather_Events,Forest_Area_pct
0,2006,UK,8.9,9.3,3.1,1441,530911230,20.4,14,59.8
1,2019,USA,31.0,4.8,4.2,2407,107364344,49.2,8,31.0
2,2014,France,33.9,2.8,2.2,1241,441101758,33.3,9,35.5
3,2010,Argentina,5.9,1.8,3.2,1892,1069669579,23.7,7,17.7
4,2007,Germany,26.9,5.6,2.4,1743,124079175,12.5,4,17.4


# Hypothesis: "Average temperature has increased over the years." 

* Visualization: Line chart showing temperature trends per country

In [30]:
import pandas as pd
import plotly.graph_objects as go

# Assuming df is your original DataFrame containing the data
# Group by 'Country' and 'Year' and calculate the average temperature for each combination
df_avg_temp = df.groupby(['Country', 'Year'], as_index=False)['Avg_Temperature_degC'].mean()

# Create the base figure
fig = go.Figure()

# Add a line trace for each country
countries = df_avg_temp['Country'].unique()
for country in countries:
    country_data = df_avg_temp[df_avg_temp['Country'] == country]
    fig.add_trace(go.Scatter(x=country_data['Year'], y=country_data['Avg_Temperature_degC'],
                             mode='lines+markers', name=country))

# Update layout for better interactivity
fig.update_layout(
    title="Temperature Trends Over Time by Country",
    xaxis=dict(title="Year", showgrid=True),
    yaxis=dict(title="Average Temperature (°C)", showgrid=True),
    hovermode="x unified",  # Show all values for a given year on hover
    legend_title="Country",
    template="plotly_white",  # Clean background for better visibility
)

# Add a dropdown menu to filter by country
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(
                    args=[{"visible": [True] * len(countries)}],
                    label="Show All",
                    method="update"
                ),
                *[
                    dict(
                        args=[{"visible": [country == selected_country for country in countries]}],
                        label=selected_country,
                        method="update"
                    )
                    for selected_country in countries
                ]
            ],
            direction="down",
            showactive=True,
            active=0,
            x=0.1,
            xanchor="left",
            y=1.15,
            yanchor="top"
        )
    ]
)

# Show the interactive plot
fig.show()


Visualization: Line chart showing average temperature trends over time

In [27]:
# Calculate the average temperature per year
df_avg = df.groupby("Year").agg({"Avg_Temperature_degC": "mean"}).reset_index()

# Create the line chart for average temperature trends over time
fig = px.line(df_avg, x="Year", y="Avg_Temperature_degC",
              title="Average Temperature Trends Over Time",
              labels={"Avg_Temperature_degC": "Average Temperature (°C)"},
              color_discrete_sequence=px.colors.sequential.Cividis)  # Cividis is color-blind friendly

# Improve interactivity
fig.update_layout(
    hovermode="x unified",  # Show all values for a given year on hover
    template="plotly_white",  # Clean background for better visibility
    xaxis=dict(title="Year", showgrid=True),
    yaxis=dict(title="Average Temperature (°C)", showgrid=True),
    dragmode="zoom",  # Enable zooming and panning
    hoverlabel=dict(namelength=-1),  # Show full country names on hover
)

# Show the interactive plot
fig.show()


# Hypothesis: "Countries with a higher percentage of renewable energy have lower CO2 emissions." 

* Visualization: Bar chart comparing countries.

In [5]:
# Create a bar chart for Renewable Energy vs. CO2 Emissions
fig = px.bar(df, x="Country", y="CO2_Emissions_tons_per_capita",
             color="Renewable_Energy_pct",
             title="Impact of Renewable Energy on CO2 Emissions",
             labels={"CO2_Emissions_tons_per_capita": "CO2 Emissions (tons per capita)",
                     "Renewable_Energy_pct": "Renewable Energy (%)"},
             color_continuous_scale=px.colors.sequential.Cividis)  # Color-blind friendly

# Improve interactivity
fig.update_layout(
    xaxis=dict(title="Country", tickangle=-45),  # Rotate country names for readability
    yaxis=dict(title="CO2 Emissions (tons per capita)"),
    template="plotly_white"
)

# Show the interactive bar chart
fig.show()

# Hypothesis: "A decrease in forest area percentage leads to an increase in extreme weather events." 

* Visualization: Dual-axis line chart showing trends.

In [12]:
# Aggregate data by year for trend analysis
df_grouped = df.groupby("Year").agg({
    'Forest_Area_pct': 'mean',          # Example: mean of Forest_Area_pct
    'Extreme_Weather_Events': 'mean',   # Example: mean of Extreme_Weather_Events
    # Add other numeric columns here with their aggregation method
}).reset_index()

# Create figure
fig = go.Figure()

# Add line for Forest Area Percentage
fig.add_trace(go.Scatter(x=df_grouped["Year"], y=df_grouped["Forest_Area_pct"],
                         mode='lines+markers', name='Forest Area (%)',
                         line=dict(color='green')))

# Add line for Extreme Weather Events
fig.add_trace(go.Scatter(x=df_grouped["Year"], y=df_grouped["Extreme_Weather_Events"],
                         mode='lines+markers', name='Extreme Weather Events',
                         line=dict(color='red'), yaxis="y2"))

# Update layout for dual-axis
fig.update_layout(
    title="Extreme Weather Events vs. Deforestation",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Forest Area (%)", side="left", showgrid=False),
    yaxis2=dict(title="Extreme Weather Events", side="right", overlaying="y", showgrid=False),
    template="plotly_white"
)

# Show the plot
fig.show()

# Hypothesis: "Higher population growth contributes to increased CO2 emissions and rising sea levels." 

* Multi-line chart with population, CO2, and sea level rise.

The code groups the data by the "Year" column, then sums the values of "Population", "CO2_Emissions_tons_per_capita", and "Sea_Level_Rise_mm" for each year. Finally, it checks and prints the columns of the resulting df_grouped DataFrame to ensure that the necessary columns are present.

In [18]:
# Check the grouped data
df_grouped = df.groupby("Year")[['Population', 'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm']].sum()
print(df_grouped.columns)  # Ensure 'Population' is present

Index(['Population', 'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm'], dtype='object')


In [20]:
# 1. Check the original column names
print(df.columns)

# 2. Perform the grouping operation
df_grouped = df.groupby("Year")[['Population', 'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm']].sum()

# 3. Check the columns after grouping
print(df_grouped.columns)


Index(['Year', 'Country', 'Avg_Temperature_degC',
       'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm', 'Rainfall_mm',
       'Population', 'Renewable_Energy_pct', 'Extreme_Weather_Events',
       'Forest_Area_pct'],
      dtype='object')
Index(['Population', 'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm'], dtype='object')


The code below plots three separate interactive charts: Population over time, CO2 emission per capita over time and Sea level rise over time

In [27]:
# Check the original DataFrame before grouping
print(df.columns)

# Group by 'Year' and aggregate the necessary columns
df_grouped = df.groupby("Year", as_index=False).agg({
    'Population': 'sum',  # Make sure to aggregate the 'Population' column
    'CO2_Emissions_tons_per_capita': 'mean',
    'Sea_Level_Rise_mm': 'mean'
})

# Reset index to ensure 'Year' is a column (if it becomes an index during grouping)
df_grouped = df_grouped.reset_index()

# Check if 'Year' and 'Population' are now available as columns
print(df_grouped.columns)

# Plot for Population
fig_population = go.Figure()
fig_population.add_trace(go.Scatter(x=df_grouped["Year"], y=df_grouped["Population"],
                                    mode='lines+markers', name='Population',
                                    line=dict(color='blue')))

fig_population.update_layout(
    title="Population Over Time",
    xaxis_title="Year",
    yaxis_title="Population",
    template="plotly_dark"
)

# Show the Population plot
fig_population.show()

# Plot for CO2 Emissions per capita
fig_co2 = go.Figure()
fig_co2.add_trace(go.Scatter(x=df_grouped["Year"], y=df_grouped["CO2_Emissions_tons_per_capita"],
                             mode='lines+markers', name='CO2 Emissions per capita',
                             line=dict(color='green')))

fig_co2.update_layout(
    title="CO2 Emissions per capita Over Time",
    xaxis_title="Year",
    yaxis_title="CO2 Emissions (tons per capita)",
    template="plotly_dark"
)

# Show the CO2 Emissions plot
fig_co2.show()

# Plot for Sea Level Rise
fig_sea_level = go.Figure()
fig_sea_level.add_trace(go.Scatter(x=df_grouped["Year"], y=df_grouped["Sea_Level_Rise_mm"],
                                  mode='lines+markers', name='Sea Level Rise (mm)',
                                  line=dict(color='red')))

fig_sea_level.update_layout(
    title="Sea Level Rise Over Time",
    xaxis_title="Year",
    yaxis_title="Sea Level Rise (mm)",
    template="plotly_dark"
)

# Show the Sea Level Rise plot
fig_sea_level.show()

Index(['Year', 'Country', 'Avg_Temperature_degC',
       'CO2_Emissions_tons_per_capita', 'Sea_Level_Rise_mm', 'Rainfall_mm',
       'Population', 'Renewable_Energy_pct', 'Extreme_Weather_Events',
       'Forest_Area_pct'],
      dtype='object')
Index(['index', 'Year', 'Population', 'CO2_Emissions_tons_per_capita',
       'Sea_Level_Rise_mm'],
      dtype='object')
