In [43]:
#Importing Dependencies
import pandas as pd
import numpy as np
from scipy.stats import linregress
import scipy.stats as st

In [2]:
#Creating variables for the data csv
NitrousOxide = 'Source_Data/ghg-concentrations_fig-3 Nitrous Oxide.csv'
CarbonDioxide = 'Source_Data/ghg-concentrations_fig-CO2.csv'
Methane = 'Source_Data/ghg-concentrations_fig-Methane.csv'
Glaciers = 'Source_Data/glaciers_fig-1.csv'
Precipitation = 'Source_Data/precipitation_fig-2.csv'
SeaLevel = 'Source_Data/sea-level_fig-1.csv'
SeaTemperature = 'Source_Data/sea-surface-temp_fig-1.csv'
EarthTemperature = 'Source_Data/temperature_fig-2.csv'

In [3]:
#Creating dataframes from csv
NitrousOxide_df = pd.read_csv(NitrousOxide)
CarbonDioxide_df = pd.read_csv(CarbonDioxide)
Methane_df = pd.read_csv(Methane)
Glaciers_df = pd.read_csv(Glaciers)
Precipitation_df = pd.read_csv(Precipitation)
SeaLevel_df = pd.read_csv(SeaLevel)
SeaTemperature_df = pd.read_csv(SeaTemperature)
EarthTemperature_df = pd.read_csv(EarthTemperature)

In [4]:
#Checking Nitrous Oxide concentration DataFrame
NitrousOxide_df.head()

Unnamed: 0,Year (negative values = BC),"EPICA Dome C, Antarctica",Antarctica (Battle et al.),"Cape Grim, Australia","Mauna Loa, Hawaii","South Pole, Antarctica","Barrow, Alaska"
0,1903,,278.7,,,,
1,1920,,281.3,,,,
2,1937,284.5,,,,,
3,1954,,286.2,,,,
4,1958,,288.1,,,,


In [5]:
#Renaming the Year column
NitrousOxide_df = NitrousOxide_df.rename(columns={"Year (negative values = BC)": "Year"})

In [6]:
#Rechecking the Nitrous Oxide dataframe
NitrousOxide_df.head()

Unnamed: 0,Year,"EPICA Dome C, Antarctica",Antarctica (Battle et al.),"Cape Grim, Australia","Mauna Loa, Hawaii","South Pole, Antarctica","Barrow, Alaska"
0,1903,,278.7,,,,
1,1920,,281.3,,,,
2,1937,284.5,,,,,
3,1954,,286.2,,,,
4,1958,,288.1,,,,


In [7]:
#Taking the average of the different columns to create a Mean concentration column for each year
NitrousOxide_df['mean'] = NitrousOxide_df.iloc[:, 1:6].mean(axis=1)
NitrousOxide_df.head()

Unnamed: 0,Year,"EPICA Dome C, Antarctica",Antarctica (Battle et al.),"Cape Grim, Australia","Mauna Loa, Hawaii","South Pole, Antarctica","Barrow, Alaska",mean
0,1903,,278.7,,,,,278.7
1,1920,,281.3,,,,,281.3
2,1937,284.5,,,,,,284.5
3,1954,,286.2,,,,,286.2
4,1958,,288.1,,,,,288.1


In [8]:
#Dropping the individual data columns, leaving only the year and the mean column
Cleaned_NitrousOxide_df = NitrousOxide_df[['Year', 'mean']]
#Renaming the Mean column to be more descriptive
Cleaned_NitrousOxide_df = Cleaned_NitrousOxide_df.rename(columns={'mean':'Mean Nitrous Oxide Concentration'})
Cleaned_NitrousOxide_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration
0,1903,278.7
1,1920,281.3
2,1937,284.5
3,1954,286.2
4,1958,288.1


In [9]:
#Checking the Carbon Dioxide dataframe
CarbonDioxide_df.head()

Unnamed: 0,Year,Antarctic Ice Cores,Mauna Loa,Barrow,Cape Matatula,South Pole,Cape Grim,Lampedusa,Shetland
0,1880,287.770729,,,,,,,
1,1883,292.463935,,,,,,,
2,1884,289.762413,,,,,,,
3,1884,289.226599,,,,,,,
4,1886,288.115764,,,,,,,


In [10]:
#Taking the mean of the readings for each year and creating a Mean Carbon Dioxide Concentration column
CarbonDioxide_df['Mean Carbon Dioxide Concentration'] = CarbonDioxide_df.iloc[:, 1:8].mean(axis=1)
CarbonDioxide_df.head()

Unnamed: 0,Year,Antarctic Ice Cores,Mauna Loa,Barrow,Cape Matatula,South Pole,Cape Grim,Lampedusa,Shetland,Mean Carbon Dioxide Concentration
0,1880,287.770729,,,,,,,,287.770729
1,1883,292.463935,,,,,,,,292.463935
2,1884,289.762413,,,,,,,,289.762413
3,1884,289.226599,,,,,,,,289.226599
4,1886,288.115764,,,,,,,,288.115764


In [11]:
#Creating a dataframe of the year and Mean Carbon Dioxide Concentration columns
Averaged_CarbonDioxide_df = CarbonDioxide_df[['Year', 'Mean Carbon Dioxide Concentration']]
Averaged_CarbonDioxide_df.head()

Unnamed: 0,Year,Mean Carbon Dioxide Concentration
0,1880,287.770729
1,1883,292.463935
2,1884,289.762413
3,1884,289.226599
4,1886,288.115764


In [12]:
#Taking the floor of the year column to return integer years
Averaged_CarbonDioxide_df['Year'] = Averaged_CarbonDioxide_df['Year'].apply(np.floor).astype(int)
#Checking the datatypes
Averaged_CarbonDioxide_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199 entries, 0 to 198
Data columns (total 2 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Year                               199 non-null    int32  
 1   Mean Carbon Dioxide Concentration  199 non-null    float64
dtypes: float64(1), int32(1)
memory usage: 2.5 KB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Averaged_CarbonDioxide_df['Year'] = Averaged_CarbonDioxide_df['Year'].apply(np.floor).astype(int)


In [13]:
#Doing a group by year of the dataframe, with each value being the mean of the concentraions
Cleaned_CarbonDioxide_df = Averaged_CarbonDioxide_df.groupby(['Year']).mean()
Cleaned_CarbonDioxide_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 117 entries, 1880 to 2021
Data columns (total 1 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Mean Carbon Dioxide Concentration  117 non-null    float64
dtypes: float64(1)
memory usage: 1.8 KB


In [14]:
#Checking the Methane dataframe
Methane_df.head()

Unnamed: 0,Year,"EPICA Dome C, Antarctica",Law Dome,"Cape Grim, Australia","Mauna Loa, Hawaii","Shetland Islands, Scotland"
0,1879,,822.3,,,
1,1884,,828.5,,,
2,1888,,833.6,,,
3,1893,,836.8,,,
4,1894,,852.3,,,


In [15]:
#Removing commas from the measurements
Methane_df = Methane_df.replace(',','', regex=True)
#Changing the datatypes for the measurement columns to floats
Methane_df[['Cape Grim, Australia','Mauna Loa, Hawaii', 'Shetland Islands, Scotland']] = Methane_df[['Cape Grim, Australia','Mauna Loa, Hawaii', 'Shetland Islands, Scotland']].astype(float)
Methane_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 6 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Year                        75 non-null     int64  
 1   EPICA Dome C, Antarctica    1 non-null      float64
 2   Law Dome                    37 non-null     object 
 3   Cape Grim, Australia        37 non-null     float64
 4   Mauna Loa, Hawaii           37 non-null     float64
 5   Shetland Islands, Scotland  9 non-null      float64
dtypes: float64(4), int64(1), object(1)
memory usage: 3.6+ KB


In [16]:
#Changing the Law Dome datatype to float
Methane_df['Law Dome'] = Methane_df['Law Dome'].astype(float)
Methane_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 6 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Year                        75 non-null     int64  
 1   EPICA Dome C, Antarctica    1 non-null      float64
 2   Law Dome                    37 non-null     float64
 3   Cape Grim, Australia        37 non-null     float64
 4   Mauna Loa, Hawaii           37 non-null     float64
 5   Shetland Islands, Scotland  9 non-null      float64
dtypes: float64(5), int64(1)
memory usage: 3.6 KB


In [17]:
#Creating a column for the mean concentration of Methane by year
Methane_df['Mean Methane Concentration'] = Methane_df.iloc[:, 1:5].mean(axis=1)
Methane_df.head()

Unnamed: 0,Year,"EPICA Dome C, Antarctica",Law Dome,"Cape Grim, Australia","Mauna Loa, Hawaii","Shetland Islands, Scotland",Mean Methane Concentration
0,1879,,822.3,,,,822.3
1,1884,,828.5,,,,828.5
2,1888,,833.6,,,,833.6
3,1893,,836.8,,,,836.8
4,1894,,852.3,,,,852.3


In [18]:
#Creating a new dataframe of the year and mean concentration columns
Cleaned_Methane_df = Methane_df[['Year', 'Mean Methane Concentration']]
Cleaned_Methane_df.head()

Unnamed: 0,Year,Mean Methane Concentration
0,1879,822.3
1,1884,828.5
2,1888,833.6
3,1893,836.8
4,1894,852.3


In [19]:
#Creating Cleaned Glaciers dataframe of the year and Mean cumulative mass balance
Cleaned_Glaciers_df = Glaciers_df[['Year','Mean cumulative mass balance']]
#Renaming the Mean cumulative mass balance as Mean cumulative mass balance of glaciers
Cleaned_Glaciers_df = Cleaned_Glaciers_df.rename(columns={"Mean cumulative mass balance":'Mean cumulative mass balance of glaciers'})
Cleaned_Glaciers_df.head()

Unnamed: 0,Year,Mean cumulative mass balance of glaciers
0,1956,0.0
1,1957,-0.12
2,1958,-0.976
3,1959,-1.414
4,1960,-1.968


In [20]:
#Creating the cleaned precipitation dataframe, renaming the Anomaly column to Precipitation Anomaly
Cleaned_Precipitation_df = Precipitation_df.rename(columns={'Anomaly ':'Precipitation Anomaly'})
Cleaned_Precipitation_df.head()

Unnamed: 0,Year,Precipitation Anomaly
0,1901,-0.375634
1,1902,-0.58587
2,1903,0.134996
3,1904,-0.087445
4,1905,-0.193744


In [21]:
#Dropping the error bounds in the Sea Level dataframe
SeaLevel_df = SeaLevel_df.drop(['CSIRO - Lower error bound (inches)', 'CSIRO - Upper error bound (inches)'], axis = 1)
SeaLevel_df.head()

Unnamed: 0,Year,CSIRO - Adjusted sea level (inches),NOAA - Adjusted sea level (inches)
0,1880,0.0,
1,1881,0.220472,
2,1882,-0.440945,
3,1883,-0.232283,
4,1884,0.590551,


In [22]:
#Checking the datatypes
SeaLevel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142 entries, 0 to 141
Data columns (total 3 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Year                                 142 non-null    int64  
 1   CSIRO - Adjusted sea level (inches)  134 non-null    float64
 2   NOAA - Adjusted sea level (inches)   29 non-null     float64
dtypes: float64(2), int64(1)
memory usage: 3.5 KB


In [23]:
#Getting the average of the sea level measurements for the mean column
SeaLevel_df['Mean Adjusted Sea Level (inches)'] = SeaLevel_df.iloc[:, 1:2].mean(axis=1)
SeaLevel_df.tail()

Unnamed: 0,Year,CSIRO - Adjusted sea level (inches),NOAA - Adjusted sea level (inches),Mean Adjusted Sea Level (inches)
137,2017,,9.110986,
138,2018,,9.234521,
139,2019,,9.480223,
140,2020,,9.592477,
141,2021,,9.79722,


In [24]:
#Fixing the gaps in the Mean Adjusted Sea level column
SeaLevel_df['Mean Adjusted Sea Level (inches)'] = SeaLevel_df['Mean Adjusted Sea Level (inches)'].fillna(SeaLevel_df['NOAA - Adjusted sea level (inches)'])
SeaLevel_df.tail()

Unnamed: 0,Year,CSIRO - Adjusted sea level (inches),NOAA - Adjusted sea level (inches),Mean Adjusted Sea Level (inches)
137,2017,,9.110986,9.110986
138,2018,,9.234521,9.234521
139,2019,,9.480223,9.480223
140,2020,,9.592477,9.592477
141,2021,,9.79722,9.79722


In [25]:
#Creating a cleaned dataframe of the year and mean columns
Cleaned_SeaLevel_df = SeaLevel_df[['Year', 'Mean Adjusted Sea Level (inches)']]
Cleaned_SeaLevel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142 entries, 0 to 141
Data columns (total 2 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Year                              142 non-null    int64  
 1   Mean Adjusted Sea Level (inches)  142 non-null    float64
dtypes: float64(1), int64(1)
memory usage: 2.3 KB


In [26]:
#Checking the Sea Temperature Dataframe
SeaTemperature_df.head()

Unnamed: 0,Year,Annual anomaly,Lower 95% confidence interval,Upper 95% confidence interval
0,1880,-0.42,-0.628152,-0.211848
1,1881,-0.33,-0.531096,-0.128904
2,1882,-0.348,-0.545568,-0.150432
3,1883,-0.456,-0.65004,-0.26196
4,1884,-0.582,-0.790152,-0.373848


In [27]:
#Creating a cleaned dataframe of the year and annual anomaly columns
#then renaming the annual anomaly column
Cleaned_SeaTemperature_df = SeaTemperature_df[['Year','Annual anomaly']]
Cleaned_SeaTemperature_df = Cleaned_SeaTemperature_df.rename(columns = {'Annual anomaly':'Sea Temperature Anomaly'})
Cleaned_SeaTemperature_df.head()

Unnamed: 0,Year,Sea Temperature Anomaly
0,1880,-0.42
1,1881,-0.33
2,1882,-0.348
3,1883,-0.456
4,1884,-0.582


In [28]:
#Checking the earth temperature dataframe

EarthTemperature_df.head()

Unnamed: 0,Year,Earth's surface (land and ocean),Lower troposphere (measured by satellite) (UAH),Lower troposphere (measured by satellite) (RSS)
0,1901,-0.27,,
1,1902,-0.468,,
2,1903,-0.666,,
3,1904,-0.828,,
4,1905,-0.504,,


In [29]:
#Creating a cleaned earth temperature dataframe and renaming the Earth's surface column to Earth's surface Temperature Anomaly (land and ocean)
Cleaned_EarthTemperature_df = EarthTemperature_df[['Year',"Earth's surface (land and ocean)"]]
Cleaned_EarthTemperature_df = Cleaned_EarthTemperature_df.rename(columns = {"Earth's surface (land and ocean)":'Earth Surface Temperature Anomaly (land and ocean)'})
Cleaned_EarthTemperature_df.head()

Unnamed: 0,Year,Earth Surface Temperature Anomaly (land and ocean)
0,1901,-0.27
1,1902,-0.468
2,1903,-0.666
3,1904,-0.828
4,1905,-0.504


In [30]:
#Checking the datatypes of the Cleaned Earth temperature dataframe
Cleaned_EarthTemperature_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 2 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   Year                                                121 non-null    int64  
 1   Earth Surface Temperature Anomaly (land and ocean)  121 non-null    float64
dtypes: float64(1), int64(1)
memory usage: 2.0 KB


In [31]:
#Creating a Merged Dataframe with the Nitrous Oxide and Carbon Dioxide dataframes
Merged_df = Cleaned_NitrousOxide_df.merge(Cleaned_CarbonDioxide_df, on='Year')
Merged_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration
0,1920,281.3,301.87586
1,1937,284.5,307.407693
2,1954,286.2,312.731387
3,1958,288.1,315.336785
4,1964,291.2,319.371736


In [32]:
#Adding the Cleaned Methane dataframe
Merged_df = Merged_df.merge(Cleaned_Methane_df, on='Year')
Merged_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration
0,1964,291.2,319.371736,1260.3
1,1967,291.5,321.615712,1284.03
2,1970,293.8,325.118531,1351.7
3,1971,294.0,326.046129,1357.2
4,1972,295.6,328.74211,1380.1


In [33]:
#Adding the other dataframes
Merged_df = Merged_df.merge(Cleaned_Glaciers_df, on='Year')
Merged_df = Merged_df.merge(Cleaned_Precipitation_df, on='Year')
Merged_df = Merged_df.merge(Cleaned_SeaLevel_df, on='Year')
Merged_df = Merged_df.merge(Cleaned_SeaTemperature_df, on='Year')
Merged_df = Merged_df.merge(Cleaned_EarthTemperature_df, on='Year')
Merged_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Mean cumulative mass balance of glaciers,Precipitation Anomaly,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,1964,291.2,319.371736,1260.3,-2.545,-0.041776,4.169291,-0.546,-0.252
1,1967,291.5,321.615712,1284.03,-2.662,-0.096894,4.452756,-0.42,0.0
2,1970,293.8,325.118531,1351.7,-3.519,-0.070516,4.677165,-0.294,0.108
3,1971,294.0,326.046129,1357.2,-3.758,0.03224,4.88189,-0.51,-0.126
4,1972,295.6,328.74211,1380.1,-4.016,-0.772485,5.240157,-0.186,0.072


In [34]:
#Checking the datatypesand the non-null count
Merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46 entries, 0 to 45
Data columns (total 9 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   Year                                                46 non-null     int64  
 1   Mean Nitrous Oxide Concentration                    46 non-null     float64
 2   Mean Carbon Dioxide Concentration                   46 non-null     float64
 3   Mean Methane Concentration                          46 non-null     float64
 4   Mean cumulative mass balance of glaciers            46 non-null     float64
 5   Precipitation Anomaly                               46 non-null     float64
 6   Mean Adjusted Sea Level (inches)                    46 non-null     float64
 7   Sea Temperature Anomaly                             46 non-null     float64
 8   Earth Surface Temperature Anomaly (land and ocean)  46 non-null     float64
dtypes:

In [35]:
#Creating a new Merged_Target_Glaciers Dataframe from the Merged Dataframe
Merged_Target_Glaciers_df = Merged_df
Merged_Target_Glaciers_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Mean cumulative mass balance of glaciers,Precipitation Anomaly,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,1964,291.2,319.371736,1260.3,-2.545,-0.041776,4.169291,-0.546,-0.252
1,1967,291.5,321.615712,1284.03,-2.662,-0.096894,4.452756,-0.42,0.0
2,1970,293.8,325.118531,1351.7,-3.519,-0.070516,4.677165,-0.294,0.108
3,1971,294.0,326.046129,1357.2,-3.758,0.03224,4.88189,-0.51,-0.126
4,1972,295.6,328.74211,1380.1,-4.016,-0.772485,5.240157,-0.186,0.072


In [36]:
#Creating a separate Merged Target Sea Level Dataframe
Merged_Target_SeaLevel_df = Cleaned_NitrousOxide_df.merge(Cleaned_CarbonDioxide_df, on='Year')
Merged_Target_SeaLevel_df = Merged_Target_SeaLevel_df.merge(Cleaned_Methane_df, on = 'Year')
Merged_Target_SeaLevel_df = Merged_Target_SeaLevel_df.merge(Cleaned_Precipitation_df, on='Year')
Merged_Target_SeaLevel_df = Merged_Target_SeaLevel_df.merge(Cleaned_SeaLevel_df, on='Year')
Merged_Target_SeaLevel_df = Merged_Target_SeaLevel_df.merge(Cleaned_SeaTemperature_df, on='Year')
Merged_Target_SeaLevel_df = Merged_Target_SeaLevel_df.merge(Cleaned_EarthTemperature_df, on='Year')
Merged_Target_SeaLevel_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Precipitation Anomaly,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,1964,291.2,319.371736,1260.3,-0.041776,4.169291,-0.546,-0.252
1,1967,291.5,321.615712,1284.03,-0.096894,4.452756,-0.42,0.0
2,1970,293.8,325.118531,1351.7,-0.070516,4.677165,-0.294,0.108
3,1971,294.0,326.046129,1357.2,0.03224,4.88189,-0.51,-0.126
4,1972,295.6,328.74211,1380.1,-0.772485,5.240157,-0.186,0.072


In [37]:
#Checking the Datatypes of the Merged_Target_SeaLevel Dataframe
Merged_Target_SeaLevel_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47 entries, 0 to 46
Data columns (total 8 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   Year                                                47 non-null     int64  
 1   Mean Nitrous Oxide Concentration                    47 non-null     float64
 2   Mean Carbon Dioxide Concentration                   47 non-null     float64
 3   Mean Methane Concentration                          47 non-null     float64
 4   Precipitation Anomaly                               47 non-null     float64
 5   Mean Adjusted Sea Level (inches)                    47 non-null     float64
 6   Sea Temperature Anomaly                             47 non-null     float64
 7   Earth Surface Temperature Anomaly (land and ocean)  47 non-null     float64
dtypes: float64(7), int64(1)
memory usage: 3.3 KB


In [38]:
#Exporting the dataframes to csv
Merged_Target_Glaciers_df.to_csv('Datasets/Merged_Target_Glaciers.csv')
Merged_Target_SeaLevel_df.to_csv('Datasets/Merged_Target_SeaLevel.csv')

In [45]:
#Statistical analysis of the Nitrous Oxide concentration versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Mean Nitrous Oxide Concentration']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Nitrous Oxide Concentration"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the atmospheric Nitrous Oxide concentration and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -0.6000318164x + 174.7276927976
The r-squared is: 0.9591631690861425
The p-value is: 3.3848834933372847e-32
The correlation between the atmospheric Nitrous Oxide concentration and the Mean cumulative mass balance of global glaciers: -0.98


In [52]:
#Statistical analysis of the Nitrous Oxide concentration versus the Sea Level
x_values = Merged_Target_Glaciers_df['Mean Nitrous Oxide Concentration']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Nitrous Oxide Concentration"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the atmospheric Nitrous Oxide concentration and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 0.1277522522x + -32.9037954159
The r-squared is: 0.9728222055684881
The p-value is: 4.326919769842663e-36
The correlation between the atmospheric Nitrous Oxide concentration and the Mean Adjusted Sea Level (inches): 0.99


In [46]:
#Statistical analysis of the Carbon Dioxide concentration versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Mean Carbon Dioxide Concentration']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Carbon Dioxide Concentration"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the atmospheric Nitrous Oxide concentration and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -0.270364001x + 85.8144274753
The r-squared is: 0.9790917573564591
The p-value is: 1.3462290478951084e-38
The correlation between the atmospheric Nitrous Oxide concentration and the Mean cumulative mass balance of global glaciers: -0.99


In [53]:
#Statistical analysis of the Carbon Dioxide concentration versus the Sea Level
x_values = Merged_Target_Glaciers_df['Mean Carbon Dioxide Concentration']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Carbon Dioxide Concentration"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the atmospheric Carbon Dioxide concentration and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 0.0571667021x + -13.8293387005
The r-squared is: 0.9794095885612599
The p-value is: 9.60940402065278e-39
The correlation between the atmospheric Carbon Dioxide concentration and the Mean Adjusted Sea Level (inches): 0.99


In [48]:
#Statistical analysis of the Methane concentration versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Mean Methane Concentration']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Methane Concentration"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the atmospheric Methane concentration and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -0.0351445697x + 46.2220421609
The r-squared is: 0.6755073757003469
The p-value is: 2.541030215497435e-12
The correlation between the atmospheric Methane concentration and the Mean cumulative mass balance of global glaciers: -0.82


In [54]:
#Statistical analysis of the Methane concentration versus the Sea Level
x_values = Merged_Target_Glaciers_df['Mean Methane Concentration']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean Methane Concentration"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the atmospheric Methane concentration and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 0.0078407919x + -6.1416707309
The r-squared is: 0.7522910497671008
The p-value is: 6.356430102562891e-15
The correlation between the atmospheric Methane concentration and the Mean Adjusted Sea Level (inches): 0.87


In [49]:
#Statistical analysis of the Precipitation anomaly versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Precipitation Anomaly']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Precipitation Anomaly"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the atmospheric Precipitation Anomaly and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -1.607376057x + -12.1154903499
The r-squared is: 0.02158130237542404
The p-value is: 0.3299364080115639
The correlation between the atmospheric Precipitation Anomaly and the Mean cumulative mass balance of global glaciers: -0.15


In [55]:
#Statistical analysis of the Precipitation Anomaly versus the Sea Level
x_values = Merged_Target_Glaciers_df['Precipitation Anomaly']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Precipitation Anomaly"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the Precipitation Anomaly and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 0.3910154487x + 6.8669133308
The r-squared is: 0.028574780408382894
The p-value is: 0.2614191075913501
The correlation between the Precipitation Anomaly and the Mean Adjusted Sea Level (inches): 0.17


In [50]:
#Statistical analysis of the Sea Temperature Anomaly versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Sea Temperature Anomaly']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Sea Temperature Anomaly"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the Sea Temperature Anomaly and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -17.4218472958x + -8.9197569598
The r-squared is: 0.8436200877081806
The p-value is: 2.4261659004728995e-19
The correlation between the Sea Temperature Anomaly and the Mean cumulative mass balance of global glaciers: -0.92


In [56]:
#Statistical analysis of the Sea Temperature Anomaly versus the Sea Level
x_values = Merged_Target_Glaciers_df['Sea Temperature Anomaly']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Sea Temperature Anomaly"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the Sea Temperature Anomaly and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 3.6457858383x + 6.2092583252
The r-squared is: 0.8265958547383347
The p-value is: 2.379296894429871e-18
The correlation between the Sea Temperature Anomaly and the Mean Adjusted Sea Level (inches): 0.91


In [51]:
#Statistical analysis of the Earth Surface Temperature Anomaly versus the Mean cumulative mass balance of glaciers
x_values = Merged_Target_Glaciers_df['Earth Surface Temperature Anomaly (land and ocean)']
y_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Earth Surface Temperature Anomaly (land and ocean)"],Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"])
print(f"The correlation between the Earth Surface Temperature Anomaly (land and ocean) and the Mean cumulative mass balance of global glaciers: {round(correlation[0],2)}")

y = -12.6434827569x + -2.4728887045
The r-squared is: 0.872566843854064
The p-value is: 2.6435777661994836e-21
The correlation between the Earth Surface Temperature Anomaly (land and ocean) and the Mean cumulative mass balance of global glaciers: -0.93


In [57]:
#Statistical analysis of the Earth Temperature Anomaly versus the Sea Level
x_values = Merged_Target_Glaciers_df['Earth Surface Temperature Anomaly (land and ocean)']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Earth Surface Temperature Anomaly (land and ocean)"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the Earth Surface Temperature Anomaly (land and ocean) and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = 2.6457370533x + 4.8602349638
The r-squared is: 0.8548915679373122
The p-value is: 4.649865638539607e-20
The correlation between the Earth Surface Temperature Anomaly (land and ocean) and the Mean Adjusted Sea Level (inches): 0.92


In [58]:
#Statistical analysis of the Mean cumulative mass balance of glaciers versus the Sea Level (Our two targets)
x_values = Merged_Target_Glaciers_df['Mean cumulative mass balance of glaciers']
y_values = Merged_Target_Glaciers_df['Mean Adjusted Sea Level (inches)']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values,y_values)
regress_values = x_values * slope + intercept
lin_eq = "y = " + str(round(slope,10)) + "x + " + str(round(intercept,10))
print(lin_eq)
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
correlation = st.pearsonr(Merged_Target_Glaciers_df["Mean cumulative mass balance of glaciers"],Merged_Target_Glaciers_df["Mean Adjusted Sea Level (inches)"])
print(f"The correlation between the Mean cumulative mass balance of glaciers and the Mean Adjusted Sea Level (inches): {round(correlation[0],2)}")

y = -0.2070698361x + 4.3699782133
The r-squared is: 0.9593699002502128
The p-value is: 3.026968391280557e-32
The correlation between the Mean cumulative mass balance of glaciers and the Mean Adjusted Sea Level (inches): -0.98


In [40]:
#Creating SeaTemp dataframes to do Sea Temperature projection models
Merged_Target_SeaTemp_df = Cleaned_NitrousOxide_df.merge(Cleaned_CarbonDioxide_df, on='Year')
Merged_Target_SeaTemp_df = Merged_Target_SeaTemp_df.merge(Cleaned_Methane_df, on = 'Year')
Merged_Target_SeaTemp_df = Merged_Target_SeaTemp_df.merge(Cleaned_SeaTemperature_df, on = 'Year')

In [41]:
#Creating EarthTemp dataframes to do Earth Temperature projection models
Merged_Target_EarthTemp_df = Cleaned_NitrousOxide_df.merge(Cleaned_CarbonDioxide_df, on='Year')
Merged_Target_EarthTemp_df = Merged_Target_EarthTemp_df.merge(Cleaned_Methane_df, on = 'Year')
Merged_Target_EarthTemp_df = Merged_Target_EarthTemp_df.merge(Cleaned_EarthTemperature_df, on = 'Year')

In [42]:
#Exporting the EarthTemp and SeaTemp to Csv
Merged_Target_EarthTemp_df.to_csv('Datasets/Merged_Target_EarthTemp.csv')
Merged_Target_SeaTemp_df.to_csv('Datasets/Merged_Target_SeaTemp.csv')