In [None]:
from google.colab import drive
drive.mount('/content/drive')


import library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error

Load the dataset

In [None]:
data_path = '/content/drive/MyDrive/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/SriLanka_Weather_Dataset_updated (1).csv'


data_set = pd.read_csv(data_path)

Columns to drop

In [None]:

need_drop = ["temperature_2m_max","temperature_2m_min", "sunrise", "sunset", "apparent_temperature_min",
             "apparent_temperature_mean", "shortwave_radiation_sum", "rain_sum",
             "apparent_temperature_max","snowfall_sum","country","weathercode"]
data_set.drop(columns=need_drop, inplace=True)

Convert date column to datetime data type

In [None]:


data_set['time'] = pd.to_datetime(data_set['time'])


Choose a city

In [None]:

city_name = "Jaffna"
data = data_set[data_set["city"] == city_name].drop('city', axis=1)

Set data index as datetime column

In [None]:

data.index = pd.to_datetime(data['time'])
data = data.drop('time', axis=1)


Fill missing values

In [None]:

data = data.fillna(method='ffill')

# Select the target columns

In [None]:

target_cols = ['temperature_2m_mean', 'precipitation_sum', 'precipitation_hours', 'et0_fao_evapotranspiration']


# Normalize the data

In [None]:


scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Define sequence length and features

In [None]:

sequence_length = 10  # Number of time steps in each sequence
num_features = len(data.columns)
num_targets = len(target_cols)

# Create sequences and corresponding labels

In [None]:

sequences = []
labels = []
for i in range(len(scaled_data) - sequence_length):
    seq = scaled_data[i:i+sequence_length]
    label = scaled_data[i+sequence_length][:num_targets]
    sequences.append(seq)
    labels.append(label)


# Convert to numpy arrays

In [None]:


sequences = np.array(sequences)
labels = np.array(labels)

# Split into train and test sets

In [None]:


train_size = int(0.8 * len(sequences))
train_x, test_x = sequences[:train_size], sequences[train_size:]
train_y, test_y = labels[:train_size], labels[train_size:]

print("Train X shape:", train_x.shape)
print("Train Y shape:", train_y.shape)
print("Test X shape:", test_x.shape)
print("Test Y shape:", test_y.shape)

# Create the LSTM model

In [None]:


model = Sequential()

# Add LSTM layers with dropout
model.add(LSTM(units=128, input_shape=(train_x.shape[1], train_x.shape[2]), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=32, return_sequences=False))
model.add(Dropout(0.2))

# Add a dense output layer with multiple outputs
model.add(Dense(units=num_targets))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

# Train the model

In [None]:

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


history = model.fit(
    train_x, train_y,
    epochs=100,
    batch_size=64,
    validation_split=0.2,  # Use part of the training data as validation
    callbacks=[early_stopping]
)

# Evaluate the model on the test set

In [None]:


test_loss = model.evaluate(test_x, test_y)
print("Test Loss:", test_loss)

# Plot training & validation loss values

In [None]:


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# Predict

In [None]:

predictions = model.predict(test_x)





# Calculate evaluation metrics for each target variable

In [None]:

for i, col in enumerate(target_cols):
    mae = mean_absolute_error(test_y[:, i], predictions[:, i])
    mse = mean_squared_error(test_y[:, i], predictions[:, i])
    rmse = np.sqrt(mse)
    print(f"\nMetrics for {col}:")
    print("Mean Absolute Error (MAE):", mae)
    print("Mean Squared Error (MSE):", mse)
    print("Root Mean Squared Error (RMSE):", rmse)



# Plot actual vs predicted values for each target variable

In [None]:

for i, col in enumerate(target_cols):
    test_y_copies = np.repeat(test_y[:, i].reshape(-1, 1), test_x.shape[-1], axis=-1)
    true_values = scaler.inverse_transform(test_y_copies)[:, i]

    prediction_copies = np.repeat(predictions[:, i].reshape(-1, 1), test_x.shape[-1], axis=-1)
    predicted_values = scaler.inverse_transform(prediction_copies)[:, i]

    plt.figure(figsize=(10, 6))
    plt.plot(data.index[-len(true_values):], true_values, label='Actual')
    plt.plot(data.index[-len(predicted_values):], predicted_values, label='Predicted')
    plt.title(f'{col} Prediction vs Actual')
    plt.xlabel('Date')
    plt.ylabel(col)
    plt.legend()
    plt.show()
