In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.layers import LSTM, Dense
from keras.models import Sequential
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler

from dataset_paths import Paths


In [None]:
df = pd.read_csv(Paths.OHE / "combined-data.csv")

df.head()


In [None]:
avg_by_year = df.groupby('Year').mean()

X = avg_by_year[['Co2']].values
y = avg_by_year['Temperature'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

split_index = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [None]:
model = Sequential([
    LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(units=50, return_sequences=False),
    Dense(units=1)
])

model.compile(optimizer='adam', loss='mse')

model.fit(X_train, y_train, batch_size=5, epochs=50, validation_data=(X_test, y_test))

loss = model.evaluate(X_test, y_test)
print("Test Loss:", loss)

predictions = model.predict(X_test)


In [None]:
mse = mean_squared_error(y_test, predictions)

rmse = np.sqrt(mse)

mae = mean_absolute_error(y_test, predictions)

r2 = r2_score(y_test, predictions)

print("MSE:".ljust(20), mse)
print("RMSE:".ljust(20), rmse)
print("MAE:".ljust(20), mae)
print("R^2:".ljust(20), r2)


In [None]:
plt.figure(figsize=(10, 6))

plt.scatter(X_test, predictions, color='blue', label='Predicted', marker='o')
plt.scatter(X_test, y_test, color='red', label='Actual', marker='o')
plt.scatter(X_train, y_train, color='purple', label='trained', marker='o')

plt.title("Overall - Actual vs Predicted")
plt.legend()

plt.show()


In [None]:
for country in df.columns[3:]:

    if df[country].sum() == 52:
        country_data = df[df[country] == 1][['Year', 'Temperature', 'Co2']]

        X = country_data[['Co2']].values
        y = country_data['Temperature'].values

        # Scale the data
        scaler = MinMaxScaler()
        X_scaled = scaler.fit_transform(X)

        # Split the data into training 80% and testing sets 20%
        split_index = int(len(X_scaled) * 0.8)
        X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
        y_train, y_test = y[:split_index], y[split_index:]

        # Reshape
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

        model = Sequential([
            LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
            LSTM(units=50, return_sequences=False),
            Dense(units=1)
        ])

        model.compile(optimizer='adam', loss='mse')

        model.fit(X_train, y_train, batch_size=5, epochs=50, validation_data=(X_test, y_test))

        loss = model.evaluate(X_test, y_test)
        print(f"{country} Test Loss:".ljust(20), loss)

        predictions = model.predict(X_test)

        mse = mean_squared_error(y_test, predictions)
        print(f"{country} MSE:".ljust(20), mse)

        r2 = r2_score(y_test, predictions)
        print("r^2: ".ljust(20), r2)

        plt.figure(figsize=(10, 6))
        plt.scatter(X_test, predictions, color='blue', label='Predicted', marker='o')
        plt.scatter(X_test, y_test, color='red', label='Actual', marker='o')
        plt.scatter(X_train, y_train, color='purple', label='trained', marker='o')
        plt.title(f"{country} - Actual vs Predicted")
        plt.legend()
        plt.show()
