Analysis 4:  
Is the life expectancy affected by type of energy consumption (renewable vs fossil fuels) in each continent?

In [None]:
%matplotlib widget
# above line is for vscode use in windows. if on mac, replace widget with notebook.
# Import Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.stats import linregress
from sklearn import datasets

In [None]:
# No need to run this cell again if already have the file "energyLEclean_NA_continents.csv"
# # making a new csv file from old csv file for analysis 4.

# Make a reference to the csv file path
csv_path = Path("energyLEclean_NA.csv")

# Import the csv file as a DataFrame
data_df = pd.read_csv(csv_path)

# check the df
data_df.head()

# Reduce df to only columns needed for this analysis
reduced_df = data_df[["Year", "Location", "energy_per_capita", "fossil_energy_per_capita", "renewables_energy_per_capita", "LEx", "LE15", "LE65", "LE80"]]
reduced_df["Location"].value_counts()

# Description of the energy columns:
# - fossil_energy_per_capita: Per capita fossil fuel consumption, measured in kilowatt-hours. This is the sum of primary energy from coal, oil and gas.
# - renewables_energy_per_capita: Per capita primary energy consumption from renewables, measured in kilowatt-hours
# - energy_per_capita: Primary energy consumption per capita, measured in kilowatt-hours

# Only keep the continents and export to a csv
conts_df = reduced_df.loc[(reduced_df['Location'] == "Africa") |
                     (reduced_df['Location'] == "North America") |
                     (reduced_df['Location'] == "South America") |
                     (reduced_df['Location'] == "Europe") |
                     (reduced_df['Location'] == "Oceania") |
                     (reduced_df['Location'] == "Asia"), :]
conts_df.to_csv("energyLEclean_NA_continents.csv", index=False)

In [None]:
# load the continents csv file as the new data_df
# Make a reference to the csv file path
csv_path_2 = Path("energyLEclean_NA_continents.csv")

# Import the csv file as a DataFrame
data_df = pd.read_csv(csv_path_2)

# check the df
data_df.head()

In [None]:
# Start analysis

# setting x-axis variable
years = np.arange(1965, 2023, 1)

#create a function to output graphs based on region input
def graph_it(region):
    fossil_fuel = data_df.loc[(data_df['Location'] == region), 'fossil_energy_per_capita']
    renewable = data_df.loc[(data_df['Location'] == region), 'renewables_energy_per_capita']
    LE_birth = data_df.loc[(data_df['Location'] == region), 'LEx']

    #setting variables for formatting for easy editing
    s=3
    w=1

    #plotting the left axis in a subplot (energy consumption kWh)
    fig, ax1 = plt.subplots()
    color = 'tab:red'
    ax1.set_xlabel('Years')
    ax1.set_ylabel('Energy Consumption (kWh)', color=color)
    ax1.plot(years, fossil_fuel, color=color, marker='x', markersize=s, linewidth=w, label='Fossil Fuel')
    ax1.plot(years, renewable, color=color, marker='d', markersize=s, linewidth=w, label='Renewable Energy')
    ax1.tick_params(axis='y', labelcolor=color)
    ax1.set_ylim(-2000, 75000)

    #plotting the right axis in another subplot (life expectancy years)
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:blue'
    ax2.set_ylabel('Life Expectancy at Birth (years)', color=color)
    ax2.plot(years, LE_birth, color=color, marker='s', markersize=s, linewidth=w, label='Life Expectancy at Birth')
    ax2.tick_params(axis='y', labelcolor=color)
    ax2.set_ylim(-2, 85)

    #putting together and saving each graph into another folder
    fig.tight_layout()
    plt.subplots_adjust(top=0.85, bottom=0.22)
    plt.title(f"Life Expectancy at Birth vs Energy Consumption Type:\n{region}")
    fig.legend(loc=4)
    #optional: plt.show()
    plt.savefig(f"analysis_4_figs/analysis_4_fig_{region}")
    #clear the plot for the next loop iteration
    plt.clf()
    return
#end of function

# create list of regions for the loop
regions = ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"]
#loop regions through the function created above.
for i in regions:
    graph_it(i)


In [None]:
# scatter plots of LE vs Fossil Fuels with linear regression line

#create a function to output graphs based on region input
def graph_it_2(region):
    fossil_fuel = data_df.loc[(data_df['Location'] == region), 'fossil_energy_per_capita']
    renewable = data_df.loc[(data_df['Location'] == region), 'renewables_energy_per_capita']
    LE_birth = data_df.loc[(data_df['Location'] == region), 'LEx']

    #scatter plot
    plt.scatter(LE_birth, fossil_fuel, marker="x", s=3, label = "Fossil Fuel", color='red')
    plt.scatter(LE_birth, renewable, marker="d", s=3, label = "Renewable Energy", color='green')
    plt.title(f"Life Expectancy vs. Per Capita Energy Consumption (2021):\n{region}")
    plt.xlabel("Life Expectancy at Birth (years)")
    plt.ylabel("Energy Consumption Per Capita (kWh)")
    plt.xlim(40, 85)
    plt.ylim(-2000, 70000)
    plt.legend(loc="upper left")

    #add regression line and r-squared value for fossil fuel vs LEx
    (slope_f, intercept_f, rvalue_f, pvalue_f, stderr_f) = linregress(LE_birth, fossil_fuel)
    regress_values_f = LE_birth * slope_f + intercept_f
    line_eq_f = "y = " + str(round(slope_f,2)) + "x + " + str(round(intercept_f,2))
    plt.plot(LE_birth,regress_values_f, color='red')
    plt.annotate(f'Fossil Fuel:\n{line_eq_f}\nr-squared = {round(rvalue_f**2,3)}',(42,45000), fontsize=10, color="red")

    #add regression line and r-squared value for renewable fuel vs LEx
    (slope_r, intercept_r, rvalue_r, pvalue_r, stderr_r) = linregress(LE_birth, renewable)
    regress_values_r = LE_birth * slope_r + intercept_r
    line_eq_r = "y = " + str(round(slope_r,2)) + "x + " + str(round(intercept_r,2))
    plt.plot(LE_birth,regress_values_r, color='green')
    plt.annotate(f'Renewable:\n{line_eq_r}\nr-squared = {round(rvalue_r**2,3)}',(42,30000), fontsize=10, color="green")

    #save the plot
    plt.savefig(f"analysis_4_figs/analysis_4-2_fig_{region}")
    #clear the plot for the next loop iteration
    plt.clf()
    return
#end of function

#loop regions through the function created above.
for i in regions:
    graph_it_2(i)