# Climate Change

In [None]:
# Dependencies and Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Import Data from dataset
file_basic_path="data_sets/"
newFile = pd.ExcelFile("data_sets/climate_change_data.xls")
climchang_df = pd.read_excel("data_sets/climate_change_data.xls", sheet_name="Data")
climchang_df.columns

In [None]:
year=[1990+x for x in range(22)]
for x in year:
    climchang_df[x]=pd.to_numeric(climchang_df[x],errors='coerce')

In [None]:
climchang_df['Series name'].unique()

In [None]:
co2_df=climchang_df[climchang_df['Series name']=='CO2 emissions per capita (metric tons)']
co2_df

### Is there a correlation between yearly global average temperature and yearly average global CO2 level ?
* Null hypothesis: There is no correlation between yearly global average temp and yearly average global CO2 levels
* Alternative hypothesis: There is a positive correlation between yearly global average temp and yearly average global CO2 levels

### If there is a correlation between an increase in global temperature and CO2 levels, then is there a relationship between a country’s average temperature and their CO2 level? Can we see a local effect on a country’s temperature from their CO2 levels?
* Null Hypothesis: A country’s increase in CO2 level does not increase the country’s average temperature (by year)
* Alternative Hypothesis: A country’s increase in CO2 level does increase the country’s average temperature.

In [None]:
# Import data from dataset
df_temp = pd.read_csv("data_sets/GlobalLandTemperaturesByCountry.csv")
df_temp.head()

In [None]:
np.dtype(df_temp["dt"])

In [None]:
df_temp.dropna(how = "any", inplace = True)
df

In [None]:
# retrieving first value in list of values separated by hyphen
splited_series = df_temp['dt'].str.split('-').str[0]
splited_series

In [None]:
#adding as new series and check whether new column for year is added
df_temp["Year"] = splited_series.astype("int64")
df_temp.head()

In [None]:
#check data type
print(np.dtype(df_temp["Year"]))

In [None]:
# Drop dt column
df_temp.drop(["dt"], axis=1, inplace=True)

In [None]:
# Create DF with data regions
region_df = pd.read_excel("data_sets/climate_change_data.xls", sheet_name="Country")
region_df.head()

In [None]:
list(region_df)

In [None]:
# Create df for regions
df = pd.DataFrame(region_df, columns= ['Region', "Country name"])
df.rename(columns = {"Country name": "Country"}, inplace=True)
df

In [None]:
# Add column for region into df_temp
df_temp["Region"]=""
df_temp

In [None]:
# Populate Region column through an merge
df_final = pd.merge(df_temp, df, on='Country')
df_final.drop(["Region_x", "AverageTemperatureUncertainty"],axis=1, inplace=True)
df_final.rename(columns = {"Region_y": "Region"}, inplace=True)
df_final[["Year", "Region", "Country", "AverageTemperature"]]

In [None]:
# Group by year and country and calculate average temperature
avg_temp_reg = df_final.groupby(["Region", "Year"])["AverageTemperature"].mean()
avg_temp_reg

In [None]:
x_axis = df_final["Region"]
y_axis = df_final["AverageTemperature"]
plt.bar(x_axis, y_axis, color='b', align="center")


# Set a Title and labels
plt.title("Average temperature by region")
plt.xlabel("Region")
plt.ylabel("Average temperature")

plt.show()
plt.tight_layout()

In [None]:
# CO2 df
mean_co2_emission = pd.read_excel("data_sets/global_mean_CO2_emissions_year.xlsx")
mean_co2_emission.rename(columns = {"year": "Year"}, inplace=True)
mean_co2_emission.head()

In [None]:
# Merge temp df and CO2 df
merged_df = pd.merge(df_final, mean_co2_emission, on = "Year", how = "left")
merged_df

In [None]:
# Ignore everything below this line

In [None]:
#climchang_df.head()

In [None]:
# Create DF with data regions
#country_df = pd.read_excel("data_sets/climate_change_data.xls", sheet_name="Country")
#country_df.head()

In [None]:
# Join two datasets on country code
#join = pd.merge(climchang_df,country_df, on='Country code', how='left')
#join.head()

In [None]:
# Drop duplicate/irrelevant columns and rename
#join.drop(["SCALE", "Series code", "Decimals", "Country name_y", "Capital city", "Income group", "Lending category"], axis=1, inplace=True)
#join.head()

In [4]:
# Rename columns and df
#join.rename(columns = {"Country name_x": "Country name"}, inplace=True)
#cc_country = join
#cc_country.head()

In [3]:
# show data for series CO2 and temperature only
#series = ['CO2 emissions per capita (metric tons)','Average daily min/max temperature (1961-1990, Celsius)']       
#df = cc_country[cc_country['Series name'].isin(series)]
#df

In [2]:
# transpote tco2 rows to column

In [1]:
#sort by region
#df.sort_values(by="Region", axis=0)
#plt.plot(x_values, y_values)

### Is there a correlation between a country’s wealth (GDP per Capita) and its yearly average CO2 emissions? 
* Null hypothesis: Wealth of a country does not affect CO2 emissions. 
* Alternative hypothesis: The wealthier a country, the more CO2 they emit.

### Is there a correlation between global average temperature and global population? (population value to be taken at the end of each year)
* Null hypothesis: There is no correlation between global yearly average temp and global population.
* Alternative hypothesis: There is a positive correlation between global yearly average temp and global population.

Does this correlation (population vs temperature) exist in individual countries, or does this vary depending on location? Perfectly sets us up for the next 2Q’s

### Is there a correlation between a specific country’s CO2 levels and Temperature vs happiness Index? 
* Null hypothesis: There is no correlation between CO2 levels/Temperature and happiness index
* Alternative hypothesis: As CO2 levels/Temperature increases, citizens of a country is less happy 

### Is there a correlation between a country’s yearly average CO2 levels and life expectancy? Does a country’s CO2 level affect the life expectancy of a population?
* Null hypothesis: There is no correlation between a country’s yearly average CO2 level
* Alternative hypothesis: As CO2 levels increase, a population’s life expectancy decreases.