### Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding, Input, Concatenate, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

### **Load the data**

In [None]:
data = pd.read_csv('new_df.csv')
data = pd.DataFrame(data)

  data = pd.read_csv('new_df.csv')


### Shift the target variable

In [None]:
lag = 3
for i in range(1, lag + 1):
  data[f'pm10-{i}'] = data['pm10'].shift(i)

In [None]:
data = data.dropna()

In [None]:
data

Unnamed: 0,SensorId,Stamp,humidity,pm10,pm25,temperature,Hour,Month_Day_Calculation,pm10-1,pm10-2,pm10-3
3,007f2b03-94e6-47b3-9e3e-44273354acd5,2023-01-01 00:45:00+00:00,85.0,68.0,36.0,4.0,0.75,1.032258,64.0,59.5,55.0
4,007f2b03-94e6-47b3-9e3e-44273354acd5,2023-01-01 01:00:00+00:00,85.0,65.0,32.0,4.0,1.00,1.032258,68.0,64.0,59.5
5,007f2b03-94e6-47b3-9e3e-44273354acd5,2023-01-01 01:15:00+00:00,85.0,54.0,30.0,4.0,1.25,1.032258,65.0,68.0,64.0
6,007f2b03-94e6-47b3-9e3e-44273354acd5,2023-01-01 01:30:00+00:00,86.0,55.0,29.0,3.0,1.50,1.032258,54.0,65.0,68.0
7,007f2b03-94e6-47b3-9e3e-44273354acd5,2023-01-01 01:45:00+00:00,86.0,56.0,30.0,3.0,1.75,1.032258,55.0,54.0,65.0
...,...,...,...,...,...,...,...,...,...,...,...
5151135,sensor_dev_84941_208,2024-11-21 22:00:00+00:00,81.0,79.0,34.0,1.0,22.00,11.700000,61.0,76.0,73.0
5151136,sensor_dev_84941_208,2024-11-21 22:15:00+00:00,81.0,47.0,23.0,1.0,22.25,11.700000,79.0,61.0,76.0
5151137,sensor_dev_84941_208,2024-11-21 22:30:00+00:00,81.0,64.0,29.0,1.0,22.50,11.700000,47.0,79.0,61.0
5151138,sensor_dev_84941_208,2024-11-21 22:45:00+00:00,81.0,65.0,30.0,1.0,22.75,11.700000,64.0,47.0,79.0


### Encoding

In [None]:
sensor_encoder = LabelEncoder()
data['SensorId_encoded'] = sensor_encoder.fit_transform(data['SensorId'])

### Define features and target

In [None]:
features = ['humidity', 'temperature', 'pm10-1', 'pm10-2', 'pm10-3', 'Hour', 'Month_Day_Calculation']
target = 'pm10'

### Prepare the data

In [None]:
X = data[features].values
y = data[target].values
sensor_ids = data['SensorId_encoded'].values

### Scale the features

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

### Train test split

In [None]:
X_train, X_test, y_train, y_test, sensor_ids_train, sensor_ids_test = train_test_split(
    X_scaled, y, sensor_ids, test_size=0.2, random_state=False
)

### **Model**

In [None]:
sensor_count = len(np.unique(sensor_ids))
embedding_dim = 10

In [None]:
sensor_input = Input(shape=(1,))
time_series_input = Input(shape=(1, X_scaled.shape[2]))

In [None]:
sensor_embedding = Embedding(input_dim=sensor_count, output_dim=embedding_dim)(sensor_input)
sensor_embedding = Flatten()(sensor_embedding)

In [None]:
x = LSTM(64, return_sequences=False)(time_series_input)
x = Dropout(0.2)(x)

In [None]:
x = Concatenate()([x, sensor_embedding])
x = Dense(32, activation='relu')(x)
output = Dense(1)(x)

In [None]:
model = Model(inputs=[time_series_input, sensor_input], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
history = model.fit(
    [X_train, sensor_ids_train], y_train,
    epochs=1,
    batch_size=32,
    validation_data=([X_test, sensor_ids_test], y_test)
)

[1m82809/82809[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m596s[0m 7ms/step - loss: 1187.2133 - val_loss: 260.5684


In [None]:
predictions = model.predict([X_test, sensor_ids_test])

[1m20703/20703[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 3ms/step


### Evaluation

In [None]:
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
print(f"Mean Squared Error (MSE): {mse:.4f}, Mean Absolute Error (MAE): {mae:.4f}, R-squared (R²): {r2:.4f}")

Mean Squared Error (MSE): 260.5683, Mean Absolute Error (MAE): 4.6668, R-squared (R²): 0.9877
