In [15]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
temperature_df = pd.read_csv("Merged_Target_EarthTemp.csv")
temperature_df.head()

Unnamed: 0.1,Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Earth Surface Temperature Anomaly (land and ocean)
0,0,1964,291.2,319.62,1260.3,-0.252
1,1,1967,291.5,322.18,1284.03,0.0
2,2,1970,293.8,325.620315,1351.7,0.108
3,3,1971,294.0,326.32,1357.2,-0.126
4,4,1972,295.6,328.74211,1380.1,0.072


In [5]:
nitrous_oxide_df = temperature_df[['Year', 'Mean Nitrous Oxide Concentration']]
carbon_oxide_df = temperature_df[['Year', 'Mean Carbon Dioxide Concentration']]
methane_df = temperature_df[['Year', 'Mean Methane Concentration']]

In [6]:
n2o_model = LinearRegression()
co2_model = LinearRegression()
ch4_model = LinearRegression()

In [21]:
year_list = []
for i in range(31):
    year = 2020 + i
    year_list.append(year)
year_list[:5]

[2020, 2021, 2022, 2023, 2024]

In [20]:
X_future = np.array(year_list)
X_future = X_future.reshape(-1,1)
X_future[:5]

array([[2020],
       [2021],
       [2022],
       [2023],
       [2024]])

In [7]:
#Nitrous Oxide (N2O) model
X = nitrous_oxide_df["Year"].values.reshape(-1, 1)

# Display sample data
X[:5]

array([[1964],
       [1967],
       [1970],
       [1971],
       [1972]], dtype=int64)

In [8]:
y = nitrous_oxide_df['Mean Nitrous Oxide Concentration']

In [9]:
n2o_model.fit(X, y)

LinearRegression()

In [12]:
# Display the nitrous oxide model's best fit line formula
print(f"Nitrous Oxide Model's formula: y = {n2o_model.intercept_} + {n2o_model.coef_[0]}*X")

Nitrous Oxide Model's formula: y = -1187.978698284533 + 0.751828183784963*X


In [13]:
# Make predictions using the X set
predicted_y_values = n2o_model.predict(X)

In [16]:
# Compute the metrics for the linear regression model
score = n2o_model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print relevant metrics.
print("Nitrous Oxide Model")
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The score is 0.993167548002619.
The r2 is 0.993167548002619.
The mean squared error is 0.9710046046618114.
The root mean squared error is 0.9853956589420371.
The standard deviation is 11.921265220335567.


In [19]:
predicted_future_n2o = n2o_model.predict(X_future)
predicted_future_n2o

array([330.71423296, 331.46606114, 332.21788933, 332.96971751,
       333.7215457 , 334.47337388, 335.22520206, 335.97703025,
       336.72885843, 337.48068662, 338.2325148 , 338.98434298,
       339.73617117, 340.48799935, 341.23982753, 341.99165572,
       342.7434839 , 343.49531209, 344.24714027, 344.99896845,
       345.75079664, 346.50262482, 347.254453  , 348.00628119,
       348.75810937, 349.50993756, 350.26176574, 351.01359392,
       351.76542211, 352.51725029, 353.26907847])

In [22]:
y = carbon_oxide_df['Mean Carbon Dioxide Concentration']

In [23]:
co2_model.fit(X, y)

LinearRegression()

In [25]:
# Display carbon dioxide model's best fit line formula
print(f"Carbon Dioxide Model's formula: y = {co2_model.intercept_} + {co2_model.coef_[0]}*X")

Carbon Dioxide Model's formula: y = -2990.5078644359583 + 1.6812300158583764*X


In [26]:
# Make predictions using the X set
predicted_y_values = co2_model.predict(X)

In [30]:
# Compute the metrics for the linear regression model
score = co2_model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print("Carbon Dioxide Model")
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

Carbon Dioxide Model
The score is 0.9829016082807511.
The r2 is 0.9829016082807511.
The mean squared error is 12.278052990865797.
The root mean squared error is 3.5040052783729934.
The standard deviation is 26.7970582622817.


In [29]:
predicted_future_co2 = co2_model.predict(X_future)
predicted_future_co2

array([405.5767676 , 407.25799761, 408.93922763, 410.62045765,
       412.30168766, 413.98291768, 415.66414769, 417.34537771,
       419.02660772, 420.70783774, 422.38906776, 424.07029777,
       425.75152779, 427.4327578 , 429.11398782, 430.79521784,
       432.47644785, 434.15767787, 435.83890788, 437.5201379 ,
       439.20136792, 440.88259793, 442.56382795, 444.24505796,
       445.92628798, 447.60751799, 449.28874801, 450.96997803,
       452.65120804, 454.33243806, 456.01366807])

In [31]:
y = methane_df['Mean Methane Concentration']

In [32]:
ch4_model.fit(X,y)

LinearRegression()

In [33]:
# Display the model's best fit line formula
print(f"Model's formula: y = {ch4_model.intercept_} + {ch4_model.coef_[0]}*X")

Model's formula: y = -17616.499200086542 + 9.66494528624219*X


In [35]:
predicted_y_values = ch4_model.predict(X)

In [36]:
# Compute the metrics for the linear regression model
score = ch4_model.score(X, y, sample_weight=None)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.

print(f"The score is {score}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The score is 0.8735541908081198.
The mean squared error is 3376.3184705964127.
The root mean squared error is 58.10609667320988.
The standard deviation is 163.40655807256834.


In [37]:
predicted_future_ch4 = ch4_model.predict(X_future)
predicted_future_ch4

array([1906.69027812, 1916.35522341, 1926.0201687 , 1935.68511398,
       1945.35005927, 1955.01500455, 1964.67994984, 1974.34489513,
       1984.00984041, 1993.6747857 , 2003.33973099, 2013.00467627,
       2022.66962156, 2032.33456684, 2041.99951213, 2051.66445742,
       2061.3294027 , 2070.99434799, 2080.65929328, 2090.32423856,
       2099.98918385, 2109.65412913, 2119.31907442, 2128.98401971,
       2138.64896499, 2148.31391028, 2157.97885556, 2167.64380085,
       2177.30874614, 2186.97369142, 2196.63863671])

In [48]:
seaLevel_df = pd.read_csv("Merged_Target_Glaciers.csv")
seaLevel_df.head()

Unnamed: 0.1,Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Mean cumulative mass balance of glaciers,Precipitation Anomaly,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,0,1964,291.2,319.62,1260.3,-2.545,-0.041776,4.169291,-0.546,-0.252
1,1,1967,291.5,322.18,1284.03,-2.662,-0.096894,4.452756,-0.42,0.0
2,2,1970,293.8,325.620315,1351.7,-3.519,-0.070516,4.677165,-0.294,0.108
3,3,1971,294.0,326.32,1357.2,-3.758,0.03224,4.88189,-0.51,-0.126
4,4,1972,295.6,328.74211,1380.1,-4.016,-0.772485,5.240157,-0.186,0.072


In [50]:
precipitation_df = seaLevel_df[['Year','Precipitation Anomaly']]
precipitation_df.head()

Unnamed: 0,Year,Precipitation Anomaly
0,1964,-0.041776
1,1967,-0.096894
2,1970,-0.070516
3,1971,0.03224
4,1972,-0.772485


In [54]:
prec_model = LinearRegression()

In [58]:
X = precipitation_df['Year'].values.reshape(-1,1)

In [52]:
y = precipitation_df['Precipitation Anomaly']

In [70]:
prec_model.fit(X,y)

LinearRegression()

In [60]:
# Display the model's best fit line formula
print(f"Model's formula: y = {prec_model.intercept_} + {prec_model.coef_[0]}*X")

Model's formula: y = -9.827822958578155 + 0.00502766414255854*X


In [68]:
predicted_y_values = ch4_model.predict(X)

In [71]:
# Compute the metrics for the linear regression model
score = prec_model.score(X, y, sample_weight=None)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print("Precipitation Model")
print(f"The score is {score}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

Precipitation Model
The score is 0.014600012833945297.
The mean squared error is 2798485.3463339545.
The root mean squared error is 1672.867402496072.
The standard deviation is 0.636201110530651.


In [64]:
predicted_future_prec = prec_model.predict(X_future)
predicted_future_prec

array([0.32805861, 0.33308627, 0.33811394, 0.3431416 , 0.34816927,
       0.35319693, 0.35822459, 0.36325226, 0.36827992, 0.37330759,
       0.37833525, 0.38336291, 0.38839058, 0.39341824, 0.39844591,
       0.40347357, 0.40850124, 0.4135289 , 0.41855656, 0.42358423,
       0.42861189, 0.43363956, 0.43866722, 0.44369488, 0.44872255,
       0.45375021, 0.45877788, 0.46380554, 0.46883321, 0.47386087,
       0.47888853])

In [73]:
predicted_future_prec_df = pd.DataFrame(
    {
        "Year":year_list,
        'Precipitation Anomaly':predicted_future_prec
    }
)
predicted_future_prec_df.head()

Unnamed: 0,Year,Precipitation Anomaly
0,2020,0.328059
1,2021,0.333086
2,2022,0.338114
3,2023,0.343142
4,2024,0.348169


In [74]:
future_ghg_df = pd.DataFrame(
    {
        "Year": X_future.reshape(1, -1)[0],
        'Mean Nitrous Oxide Concentration':predicted_future_n2o,
        'Mean Carbon Dioxide Concentration':predicted_future_co2,
        'Mean Methane Concentration':predicted_future_ch4, 
    }
)
future_ghg_df.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration
0,2020,330.714233,405.576768,1906.690278
1,2021,331.466061,407.257998,1916.355223
2,2022,332.217889,408.939228,1926.020169
3,2023,332.969718,410.620458,1935.685114
4,2024,333.721546,412.301688,1945.350059


In [75]:
future_ghg_df.to_csv("Projected_Future_GHG_2020_2050.csv")
predicted_future_prec_df.to_csv("predicted_future_prec.csv")