In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense

# Load the CSV file
data = pd.read_csv('climate_change_data.csv')  

# Drop the Date column
data = data.drop(columns=['Date'])

# Preprocessing: Encode the categorical data (Location and Country) and scale numerical features
location_encoder = LabelEncoder()
country_encoder = LabelEncoder()

data['Location'] = location_encoder.fit_transform(data['Location'])
data['Country'] = country_encoder.fit_transform(data['Country'])

# Inputs: Temperature, Location, Country
X = data[['Temperature', 'Location', 'Country']].values
y = data['CO2 Emissions'].values

# Standardize the input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape input data to fit GRU model (samples, timesteps, features)
X_scaled = np.reshape(X_scaled, (X_scaled.shape[0], 1, X_scaled.shape[1]))

# Build the GRU model
model = Sequential()
model.add(GRU(64, input_shape=(1, X_scaled.shape[2]), return_sequences=False))
model.add(Dense(1))  # Output layer for CO2 Emissions

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_scaled, y, epochs=50, batch_size=16, validation_split=0.2)

# Plotting the training and validation loss using Plotly
fig = go.Figure()

# Add Training Loss trace
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['loss']) + 1)),
                         y=history.history['loss'],
                         mode='lines',
                         name='Training Loss'))

# Add Validation Loss trace
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['val_loss']) + 1)),
                         y=history.history['val_loss'],
                         mode='lines',
                         name='Validation Loss'))

# Update the layout
fig.update_layout(title='Model Loss Over Epochs',
                  xaxis_title='Epochs',
                  yaxis_title='Loss',
                  legend=dict(x=0.5, y=1.1, orientation="h"))

# Display the figure
fig.show()


Epoch 1/50


  super().__init__(**kwargs)


[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 158973.7500 - val_loss: 131273.7969
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 122122.1562 - val_loss: 97159.1406
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 91507.1953 - val_loss: 74826.3359
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 70803.6719 - val_loss: 58069.2266
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 54835.9414 - val_loss: 44790.1367
Epoch 6/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 42273.2578 - val_loss: 34093.8398
Epoch 7/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 32276.9805 - val_loss: 25476.9883
Epoch 8/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 23624.4102 - val_l

In [3]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the model with the training set
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2)

# Evaluate the model with the test set
test_loss = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)


Epoch 1/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2471.7339 - val_loss: 2609.8823
Epoch 2/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 882us/step - loss: 2477.2495 - val_loss: 2609.9949
Epoch 3/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 929us/step - loss: 2519.4895 - val_loss: 2613.9287
Epoch 4/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 875us/step - loss: 2483.0334 - val_loss: 2610.9055
Epoch 5/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 900us/step - loss: 2457.6096 - val_loss: 2611.3054
Epoch 6/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 926us/step - loss: 2518.9890 - val_loss: 2611.1426
Epoch 7/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2415.5723 - val_loss: 2610.9697
Epoch 8/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 925us/step - loss: 2517.2339 - 

In [9]:
# Display the first few predictions and their corresponding actual values
# Generate predictions
predictions = model.predict(X_test)

for i in range(5):
    print(f"Predicted CO2 Emissions: {predictions[i]}, Actual CO2 Emissions: {y_test[i]}")


Predicted CO2 Emissions: 1.5174035168878914, Actual CO2 Emissions: 2.584274222988132
Predicted CO2 Emissions: 17.452035586156523, Actual CO2 Emissions: 16.889595700320577
Predicted CO2 Emissions: 15.102244690440658, Actual CO2 Emissions: 12.970993964176984
Predicted CO2 Emissions: 12.998969544862605, Actual CO2 Emissions: 11.431670777490162
Predicted CO2 Emissions: 5.259615564276767, Actual CO2 Emissions: 6.757663526086432


In [11]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go
import plotly.io as pio

# Set the renderer for Linux environments (for Plotly to work correctly)
pio.renderers.default = 'iframe'  # 'iframe' works well in most environments, including LinuxONE

# Assume X and y are your features and target (CO2 emissions)
# Generate dummy data for demonstration (replace this with actual data)
np.random.seed(42)
X = np.random.rand(100, 1) * 10  # 100 samples, 1 feature
y = 2 * X.flatten() + np.random.randn(100) * 2  # Linear relationship with noise

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model with L2 regularization (Ridge Regression)
model = Ridge(alpha=1.0)
model.fit(X_train, y_train)

# Get predictions from the model for the test set
predictions = model.predict(X_test)

# Calculate Mean Squared Error to evaluate model performance
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

# Ensure y_test and predictions are 1D arrays
predictions = predictions.flatten()
y_test = y_test.flatten()

# Create the figure with enhanced visualization
fig = go.Figure()

# Add Actual CO2 trace
fig.add_trace(go.Scatter(
    x=list(range(len(y_test))),
    y=y_test,
    mode='lines+markers',
    name='Actual CO2 Emissions',
    line=dict(color='blue', width=2),
    marker=dict(size=6, symbol='circle', color='blue', opacity=0.7)
))

# Add Predicted CO2 trace
fig.add_trace(go.Scatter(
    x=list(range(len(predictions))),
    y=predictions,
    mode='lines+markers',
    name='Predicted CO2 Emissions',
    line=dict(color='orange', width=2, dash='dash'),  # Dashed line for predicted values
    marker=dict(size=6, symbol='cross', color='orange', opacity=0.7)
))

# Update the layout for better aesthetics
fig.update_layout(
    title='Predicted vs Actual CO2 Emissions',
    xaxis_title='Sample Index',
    yaxis_title='CO2 Emissions',
    legend=dict(x=0.5, y=1.1, orientation="h"),
    template='plotly_white',  # Clean white background
    hovermode='closest'
)

# Display the figure
fig.show()


Mean Squared Error: 2.6120375137100647


In [12]:
model = Sequential()
model.add(GRU(128, input_shape=(1, X_scaled.shape[2]), return_sequences=False))  # Changed to 128 units
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [13]:
# Save the model
model.save('gru_co2_model.h5')

# To load the model later
from tensorflow.keras.models import load_model
loaded_model = load_model('gru_co2_model.h5')


