In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_absolute_error, mean_squared_error

In [2]:
# Load the data
url = "https://raw.githubusercontent.com/Ekenc/Project4/main/Data/Merged_Target_SeaLevel.csv"
df = pd.read_csv(url)

# Show DataFrame
df.head()

Unnamed: 0.1,Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Precipitation Anomaly,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,0,1964,291.2,319.62,1260.3,-0.041776,4.169291,-0.546,-0.252
1,1,1967,291.5,322.18,1284.03,-0.096894,4.452756,-0.42,0.0
2,2,1970,293.8,325.620315,1351.7,-0.070516,4.677165,-0.294,0.108
3,3,1971,294.0,326.32,1357.2,0.03224,4.88189,-0.51,-0.126
4,4,1972,295.6,328.74211,1380.1,-0.772485,5.240157,-0.186,0.072


In [3]:
# Load the data
url1 = "https://raw.githubusercontent.com/Ekenc/Project4/main/Data/Projected_Future_GHG_and_Precipitation_2020_2050.csv"
df1 = pd.read_csv(url1)

# Show DataFrame
df1.head()

Unnamed: 0.1,Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Precipitation Anomaly,Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,0,2020,330.714233,405.576768,1906.690278,0.328059,0.770041,1.612599
1,1,2021,331.466061,407.257998,1916.355223,0.333086,0.792619,1.645512
2,2,2022,332.217889,408.939228,1926.020169,0.338114,0.815197,1.678425
3,3,2023,332.969718,410.620458,1935.685114,0.343142,0.837775,1.711339
4,4,2024,333.721546,412.301688,1945.350059,0.348169,0.860353,1.744252


In [4]:
df1=df1.drop("Unnamed: 0", axis = 1)
df1= df1.drop("Precipitation Anomaly", axis = 1)
df1.head()

Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,2020,330.714233,405.576768,1906.690278,0.770041,1.612599
1,2021,331.466061,407.257998,1916.355223,0.792619,1.645512
2,2022,332.217889,408.939228,1926.020169,0.815197,1.678425
3,2023,332.969718,410.620458,1935.685114,0.837775,1.711339
4,2024,333.721546,412.301688,1945.350059,0.860353,1.744252


In [5]:
# Define features set
X = df.drop("Mean Adjusted Sea Level (inches)", axis = 1)
X = X.drop("Unnamed: 0", axis = 1)
X= X.drop("Precipitation Anomaly", axis = 1)
X.head()


Unnamed: 0,Year,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,1964,291.2,319.62,1260.3,-0.546,-0.252
1,1967,291.5,322.18,1284.03,-0.42,0.0
2,1970,293.8,325.620315,1351.7,-0.294,0.108
3,1971,294.0,326.32,1357.2,-0.51,-0.126
4,1972,295.6,328.74211,1380.1,-0.186,0.072


In [6]:
# Define target vector
y = df["Mean Adjusted Sea Level (inches)"]
y[:48]

0     4.169291
1     4.452756
2     4.677165
3     4.881890
4     5.240157
5     5.003937
6     5.472441
7     5.409449
8     5.303150
9     5.598425
10    6.153543
11    5.748031
12    5.771654
13    5.795276
14    5.980315
15    6.157480
16    6.248031
17    6.346457
18    6.374016
19    6.303150
20    6.507874
21    6.622047
22    6.783465
23    7.059055
24    6.669291
25    7.003937
26    7.055118
27    7.271654
28    7.366142
29    7.728346
30    7.712598
31    7.716535
32    7.885827
33    7.960630
34    8.303150
35    8.531496
36    8.834646
37    8.897638
38    9.244094
39    8.913386
40    8.579036
41    8.924984
42    9.048399
43    9.110986
44    9.234521
45    9.480223
46    9.592477
Name: Mean Adjusted Sea Level (inches), dtype: float64

In [7]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Create the StandardScaler instance
scaler = StandardScaler()

# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

In [9]:
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Create the random forest classifier instance
random_model = RandomForestRegressor(n_estimators=50, random_state=78)

In [11]:
# Fit the model 
random_model = random_model.fit(X_train_scaled, y_train)

In [12]:
# Making predictions using the testing data
predictions = random_model.predict(X_test_scaled)

In [13]:
# Calculate the mean squared error, model evaluation
print(
  'mean_squared_error : ', mean_squared_error(y_test, predictions))
print(
  'mean_absolute_error : ', mean_absolute_error(y_test, predictions))

mean_squared_error :  0.057610066259857935
mean_absolute_error :  0.1757480997666674


In [14]:
# Compute the model score with training data
random_model.score(X_train_scaled, y_train)

0.9975872948682385

In [15]:
# Compute the model score with testing data
random_model.score(X_test_scaled, y_test)

0.9655062690953223

In [16]:
#Check Model with Predictions data
Sea_Level_Predictions = random_model.predict(df1)
Sea_Level_Predictions



array([9.439804  , 9.44943381, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.4719477 ,
       9.4719477 , 9.4719477 , 9.4719477 , 9.4719477 , 9.48997143,
       9.48997143, 9.48997143, 9.48997143, 9.48997143, 9.48997143,
       9.50428968])

In [18]:
#Check Model with Predictions data
Sea_Level_Predictions = random_model.predict(df1)
Sea_Level_Predictions



array([9.439804  , 9.44943381, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.45392397,
       9.45392397, 9.45392397, 9.45392397, 9.45392397, 9.4719477 ,
       9.4719477 , 9.4719477 , 9.4719477 , 9.4719477 , 9.48997143,
       9.48997143, 9.48997143, 9.48997143, 9.48997143, 9.48997143,
       9.50428968])

In [19]:
Linear_Prediction_df=df1.copy()
Linear_Prediction_df["Mean Adjusted Sea Level (inches)"] = Sea_Level_Predictions
Linear_Prediction_df = Linear_Prediction_df[['Mean Nitrous Oxide Concentration', 'Mean Carbon Dioxide Concentration', 'Mean Methane Concentration', 'Mean Adjusted Sea Level (inches)', 'Sea Temperature Anomaly', 'Earth Surface Temperature Anomaly (land and ocean)']]
Linear_Prediction_df

Unnamed: 0,Mean Nitrous Oxide Concentration,Mean Carbon Dioxide Concentration,Mean Methane Concentration,Mean Adjusted Sea Level (inches),Sea Temperature Anomaly,Earth Surface Temperature Anomaly (land and ocean)
0,330.714233,405.576768,1906.690278,9.439804,0.770041,1.612599
1,331.466061,407.257998,1916.355223,9.449434,0.792619,1.645512
2,332.217889,408.939228,1926.020169,9.453924,0.815197,1.678425
3,332.969718,410.620458,1935.685114,9.453924,0.837775,1.711339
4,333.721546,412.301688,1945.350059,9.453924,0.860353,1.744252
5,334.473374,413.982918,1955.015005,9.453924,0.882931,1.777165
6,335.225202,415.664148,1964.67995,9.453924,0.905509,1.810079
7,335.97703,417.345378,1974.344895,9.453924,0.928088,1.842992
8,336.728858,419.026608,1984.00984,9.453924,0.950666,1.875905
9,337.480687,420.707838,1993.674786,9.453924,0.973244,1.908819


In [20]:
from google.colab import files
Linear_Prediction_df.to_csv("RandomForestRegression_Projected_SeaLevel_Dataframe.csv",encoding = 'utf-8-sig')
files.download('RandomForestRegression_Projected_SeaLevel_Dataframe.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>