In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler  # Import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers


# Load the dataset
dataset = pd.read_csv('/content/drive/MyDrive/Datasets/so2_20180101_20231031 (1).csv')

# Choose the column for prediction
selected_column = '33204020'

# Split the data into training and testing sets (80/20 split)
train_data, test_data = train_test_split(dataset[selected_column], test_size=0.2, shuffle=False)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))  # or feature_range=(-1, 1) if you prefer
train_data_normalized = scaler.fit_transform(train_data.values.reshape(-1, 1))
test_data_normalized = scaler.transform(test_data.values.reshape(-1, 1))

# Define the function to create the dataset
def create_dataset(dataset, time_steps=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_steps - 1):
        a = dataset[i:(i + time_steps)].astype(float)
        dataX.append(a)
        dataY.append(dataset[i + time_steps].astype(float))
    return np.array(dataX), np.array(dataY)

# Prepare the training and test data
time_steps = 10
X_train, y_train = create_dataset(train_data_normalized, time_steps)
X_test, y_test = create_dataset(test_data_normalized, time_steps)

# Reshape the data for GNN input
X_train = X_train.reshape(X_train.shape[0], time_steps, 1)
X_test = X_test.reshape(X_test.shape[0], time_steps, 1)

# Define the GNN model
model = tf.keras.Sequential([
    layers.Input(shape=(time_steps, 1)),
    layers.Conv1D(32, 3, activation='relu'),
    layers.Conv1D(64, 3, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=2)

# Make predictions on the test data
predictions = model.predict(X_test)

# Inverse transform the predictions to get them back to original scale
predictions = scaler.inverse_transform(predictions).flatten()

# Evaluate the model
mse = mean_squared_error(test_data.values.squeeze()[time_steps+1:], predictions)
rmse = sqrt(mse)
print(f"Root mean squared error: {rmse}")

# Find the index of the first positive number in the test data
start_index = next(i for i, val in enumerate(test_data.values.squeeze()) if val > 0)

# Plot the results starting from the first positive number
plt.plot(test_data.values.squeeze()[time_steps+1:][start_index:], label="Actual")
plt.plot(predictions[start_index:], label="Predicted")
plt.legend()
plt.title(f'Actual vs Predicted for {selected_column}')
plt.xlabel('Time Steps')
plt.ylabel('Values')
plt.show()
