In [None]:
# Dependencies and setup
import matplotlib.pyplot as plt
import pandas as pd
import pandas as pd
import numpy as np
import os
from scipy.stats import linregress
import scipy.stats as st
from scipy.stats import ttest_ind

In [None]:
# Use os.join.path so any OS can run this code
coal_path = os.path.join("..", "Cleaned Data", "coal_consumption.csv")
natural_gas_path = os.path.join("..", "Cleaned Data", "natural_gas_consumption.csv")
nuclear_path = os.path.join("..", "Cleaned Data", "nuclear_consumption.csv")
petroleum_path = os.path.join("..", "Cleaned Data", "petroleum_consumption.csv")
world_pop_path = os.path.join("..", "Cleaned Data", "world_population_clean.csv")
total_consumption = os.path.join("..", "Cleaned Data", "total_consumption.csv")
pop_density = os.path.join("..", "Cleaned Data", "population_density_clean.csv")
temp_change = os.path.join("..", "Cleaned Data", "Temperature_Delta_1986 to 2016.csv")

# Read the csv into a data frame and display the first few rows of one of the data frames
coal = pd.read_csv(coal_path)
natural_gas = pd.read_csv(natural_gas_path)
nuclear = pd.read_csv(nuclear_path)
petroleum = pd.read_csv(petroleum_path)
world = pd.read_csv(world_pop_path)
total = pd.read_csv(total_consumption)
pop_density_df = pd.read_csv(pop_density)
temperature = pd.read_csv(temp_change)
coal.head()

In [None]:
# Define a function that takes in a data frame and returns the x and y values to plot
def plot(df, country):

    # Grab all the column names
    x = df.columns

    # Set the energy consumption type
    world_consumption = x[0]

    # Run a loop to get all the years into a list for the y axis
    years = [x[name] for name in range(1, len(x))]

    # Get the values for the data stored in the World row and create a list to use for plotting
    energy_consumption = df.loc[df[world_consumption] == country,"1986":"2016"].values.flatten().tolist()

    # Run a loop to round the values in the list
    energy_consumption = [round(float(value),2) for value in energy_consumption]

    # Return the lists to plot with
    return years, energy_consumption

In [None]:
# Call the function to get the values required to plot
coal_x, coal_y = plot(coal, "World")
natural_x, natural_y = plot(natural_gas, "World")
nuclear_x, nuclear_y = plot(nuclear, "World")
petroleum_x, petroleum_y = plot(petroleum, "World")
world_x, world_y = plot(world, "World")

# Since the world population file is slight different, remove the first column header
del world_x[0]

# Create a rounded list of values for world population to see differences on the plot
world_y = [round(value/1000000000,2) for value in world_y]

In [None]:
# Make adjustments for plot size, plot each line, add legend, add title and axis labels, etc
fig, ax1 = plt.subplots(figsize=(20,10))
ax1.plot(coal_x, coal_y)
ax1.plot(natural_x, natural_y)
ax1.plot(nuclear_x, nuclear_y)
ax1.plot(petroleum_x, petroleum_y)
ax1.set_ylabel("Total Energy Consumption (quad Btu)")
ax1.set_xlabel("Time (years)")
ax1.legend(labels=["Coal", "Natural Gas", "Nuclear", "Petroleum"], loc=2)

# Create a second plot over the first one for world population
# ax2 = ax1.twinx()
# ax2.plot(world_x, world_y, color="magenta")
# ax2.set_ylabel("World Population (billions)")
# ax2.legend(labels=["World Population"], loc=4)
plt.title("World Energy Consumption by Energy Type from 1986 to 2016");

# Save the image as a png
plt.savefig("../Images/World Energy Consumption by Energy Type.png")

In [None]:
# Plot all of the data as a line plot showing years on the x-axis and change in population density on the y-axis
plt.figure(figsize=(20,10))
for country in pop_density_df["Country Name"]:
    pop_x, pop_y = plot(pop_density_df, country)
    del pop_x[0]
    plt.plot(pop_x, pop_y)
plt.yticks(np.arange(0, 25000, 2500))
#plt.grid(axis="x")
plt.ylabel("Population density (people per sq. km of land area)")
plt.xlabel("Time (years)")
plt.title("Population Density over Time");
plt.savefig("../Images/Population Density over Time.png")

In [None]:
# Since it appears as though most of the countries don't really change too much, let's look at the top 5 countries with the largest change

# Get a list of the column headers
x = pop_density_df.columns

# Create a new dataframe and set the columns
top_5 = pd.DataFrame(columns=x)

# Create a list of starting values from the first year in the data
starting_year = pop_density_df["1986"].tolist()

# Find the top 5 highest starting populations and store them in a data frame
for x in range(0,5):
    max_value = max(starting_year)
    top_5 = top_5.append(pop_density_df.loc[pop_density_df["1986"] == max_value])
    starting_year.remove(max_value)

# Find the top 5 lowest starting populations and store them in a data frame
for x in range(0,5):
    min_value = min(starting_year)
    top_5 = top_5.append(pop_density_df.loc[pop_density_df["1986"] == min_value])
    starting_year.remove(min_value)

# Display the data frame
top_5

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "Hong Kong")
pop_density_x, pop_density_y = plot(pop_density_df, "Hong Kong SAR, China")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
pop_density_y = [round(value/10000,2) for value in pop_density_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by Hong Kong (quad Btu)')
rects2 = ax.bar(x + width/2, pop_density_y, width, label="Population density (10,000 people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('Hong Kong Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend();

# Save the image as a png
#plt.savefig("../Images/Hong Kong Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "Hong Kong")
pop_density_x, pop_density_y = plot(pop_density_df, "Hong Kong SAR, China")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Hong Kong Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Hong Kong (quad Btu)")
plt.grid()

In [None]:
# Create a linear regression to find the correlation
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Hong Kong Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Hong Kong (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(6200,0.4),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")
# Save the image as a png
#plt.savefig("../Images/Hong Kong Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "Singapore")
top_5_x, top_5_y = plot(pop_density_df, "Singapore")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
top_5_y = [round(value/10000,2) for value in top_5_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by Singapore (quad Btu)')
rects2 = ax.bar(x + width/2, top_5_y, width, label="Population density (10,000 people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('Singapore Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend();

# Save the image as a png
plt.savefig("../Images/Singapore Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "Singapore")
pop_density_x, pop_density_y = plot(pop_density_df, "Singapore")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Singapore Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Singapore (quad Btu)")
plt.grid()

In [None]:
# Create a linear regression to find the correlation
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Singapore Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Singapore (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(5500,0.5),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")
# Save the image as a png
plt.savefig("../Images/Singapore Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "Mongolia")
top_5_x, top_5_y = plot(pop_density_df, "Mongolia")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
top_5_y = [round(value/10,2) for value in top_5_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by Mongolia (quad Btu)')
rects2 = ax.bar(x + width/2, top_5_y, width, label="Population density (10 people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('Mongolia Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend();

# Save the image as a png
#plt.savefig("../Images/Mongolia Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "Mongolia")
pop_density_x, pop_density_y = plot(pop_density_df, "Mongolia")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Mongolia Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Mongolia (quad Btu)")
plt.grid()

In [None]:
# Create a linear regression to find the correlation
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Mongolia Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by Mongolia (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(1.6,0.13),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")
# Save the image as a png
#plt.savefig("../Images/Mongolia Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "United Kingdom")
top_5_x, top_5_y = plot(pop_density_df, "United Kingdom")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
top_5_y = [round(value/100,2) for value in top_5_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by United Kingdom (quad Btu)')
rects2 = ax.bar(x + width/2, top_5_y, width, label="Population density (100 people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('United Kingdom Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend();

# Save the image as a png
plt.savefig("../Images/United Kingdom Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "United Kingdom")
pop_density_x, pop_density_y = plot(pop_density_df, "United Kingdom")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("United Kingdom Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by United Kingdom (quad Btu)")
plt.grid()

In [None]:
# Create a linear regression to find the correlation
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("United Kingdom Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by United Kingdom (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(235,8.25),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/United Kingdom Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "United States")
top_5_x, top_5_y = plot(pop_density_df, "United States")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
#top_5_y = [round(value/10,2) for value in top_5_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by United States (quad Btu)')
rects2 = ax.bar(x + width/2, top_5_y, width, label="Population density (people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('United States Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend(loc=2);

# Save the image as a png
plt.savefig("../Images/United States Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "United States")
pop_density_x, pop_density_y = plot(pop_density_df, "United States")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("United States Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by United States (quad Btu)")
plt.grid()

In [None]:
# Create a linear regression to find the correlation
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("United States Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by United States (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(30,80),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/United States Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
# Find total consumption and population density data
total_x, total_y = plot(total, "World")
top_5_x, top_5_y = plot(world, "World")

# Set labels, divide population density by 10,000 for better visual
labels = total_x
top_5_y = [round(value/10000000,2) for value in top_5_y]

# Set label locations and width of the bars
x = np.arange(len(labels))
width = 0.35

# Set the plot size and create the plotting area
fig, ax = plt.subplots(figsize=(20,5))

# Create the individual bars
rects1 = ax.bar(x - width/2, total_y, width, label='Total Energy Consumption by World (quad Btu)')
rects2 = ax.bar(x + width/2, top_5_y, width, label="Population density (10,000,000 people per sq. km of land area)")

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Time (years)")
ax.set_title('World Total Energy Consumption vs Population Density')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend();

ax.tick_params(axis='x', which='major', width=5)
# Save the image as a png
plt.savefig("../Images/World Total Energy Consumption vs Population Density.png")

In [None]:
# Create a scatter plot for the data
total_x, total_y = plot(total, "World")
pop_density_x, pop_density_y = plot(world, "World")
plt.scatter(pop_density_y , total_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("World Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by the World (quad Btu)")
plt.grid()

In [None]:
x_values = np.array(pop_density_y)
y_values = np.array(total_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("World Total Energy Consumption vs Population Density")
plt.xlabel("Population density (people per sq. km of land area)")
plt.ylabel("Total Energy Consumption by the World (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(6000000000,300),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/World Total Energy Consumption vs Population Density Regression.png", dpi=100)

In [None]:
gcag = temperature.loc[temperature["Source"] == "GCAG", "Year": "Mean"]
gcag_clean = gcag.loc[gcag["Year"] > 1985].sort_values(by="Year")
gcag_clean = gcag_clean["Mean"].values.flatten().tolist()

In [None]:
gistemp = temperature.loc[temperature["Source"] == "GISTEMP", "Year": "Mean"]
gistemp_clean = gistemp.loc[gistemp["Year"] > 1985].sort_values(by="Year")
gistemp_clean = gistemp_clean["Mean"].values.flatten().tolist()

In [None]:
# Make adjustments for plot size, plot each line, add legend, add title and axis labels, etc
fig, ax1 = plt.subplots(figsize=(20,10))
#ax1.plot(coal_x, coal_y)
#ax1.plot(natural_x, natural_y)
#ax1.plot(nuclear_x, nuclear_y)
ax1.plot(petroleum_x, petroleum_y)
ax1.set_ylabel("Total Energy Consumption (quad Btu)")
ax1.set_xlabel("Time (years)")
ax1.legend(labels=["Coal", "Natural Gas", "Nuclear", "Petroleum"], loc=2)

# Create a second plot over the first one
ax2 = ax1.twinx()
ax2.plot(coal_x, gcag_clean, color="magenta")
ax2.set_ylabel("Change in Average Temperature (GCAG)")
ax2.legend(labels=["Change in Average Temperature"], loc=4)
plt.title("World Energy Consumption vs Change in Average Temperature 1986 to 2016");

# Save the image as a png
#plt.savefig("../Images/World Energy Consumption by Energy Type.png")

In [None]:
# Create a scatter plot for the data
plt.scatter(gcag_clean , coal_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Coal Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Coal Consumption by the World (quad Btu)")
plt.grid()

In [None]:
x_values = np.array(gcag_clean)
y_values = np.array(coal_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Coal Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Coal Consumption by the World (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(0.5,80),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/Coal Consumption vs Change in Average Temperature Regression.png", dpi=100)

In [None]:
plt.scatter(gcag_clean , natural_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Natural Gas Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Natural Gas Consumption by the World (quad Btu)")
plt.grid()

In [None]:
x_values = np.array(gcag_clean)
y_values = np.array(natural_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Natural Gas Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Natural Gas Consumption by the World (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(0.5,70),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/Natural Gas Consumption vs Change in Average Temperature Regression.png", dpi=100)

In [None]:
plt.scatter(gcag_clean , nuclear_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Nuclear Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Nuclear Consumption by the World (quad Btu)")
plt.grid()

In [None]:
x_values = np.array(gcag_clean)
y_values = np.array(nuclear_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Nuclear Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Nuclear Consumption by the World (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(0.5,40),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/Nuclear Consumption vs Change in Average Temperature Regression.png", dpi=100)

In [None]:
plt.scatter(gcag_clean , petroleum_y, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.title("Petroleum Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Petroleum Consumption by the World (quad Btu)")
plt.grid()

In [None]:
x_values = np.array(gcag_clean)
y_values = np.array(petroleum_y)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5, marker="o", color="#195e83", edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.title("Petroleum Consumption vs Change in Average Temperature")
plt.xlabel("Change in Average Temperature (GCAG)")
plt.ylabel("Petroleum Consumption by the World (quad Btu)")
correlation = st.pearsonr(x_values, y_values)
r = f"r-sq = {rvalue**2:.2}, p = {pvalue:.3}"
plt.annotate(r,(0.5,130),fontsize=14,color="red");
print(f"The correlation between both factors is {round(correlation[0],2)}")

# Save the image as a png
plt.savefig("../Images/Petroleum Consumption vs Change in Average Temperature Regression.png", dpi=100)

In [None]:
# Conduct a Independent T-Test (Two Sample) with a p-value of .01
# Null Hypothesis (H0): Both petroleum consumption and natural gas consumption affect the change in temperature by the same amount
# Alternative Hypothesis (HA): Petroleum and natural gas consumption have different effects on the change in temperature
#st.ttest_ind(petroleum_y, natural_y, equal_var=False)

# We can reject the null hypothesis and say that petroleum and natural gas have different effects on the change in temperature.