In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, TimeDistributed, Input, BatchNormalization

In [2]:
data = pd.read_csv('weatherHistory.csv')
data.head()

Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


In [3]:
# Select relevant columns
features = ['Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'Pressure (millibars)']
target = 'Apparent Temperature (C)'
weather_data_cleaned = data[features + [target]].dropna()

In [4]:
weather_data_cleaned.head()

Unnamed: 0,Temperature (C),Humidity,Wind Speed (km/h),Pressure (millibars),Apparent Temperature (C)
0,9.472222,0.89,14.1197,1015.13,7.388889
1,9.355556,0.86,14.2646,1015.63,7.227778
2,9.377778,0.89,3.9284,1015.94,9.377778
3,8.288889,0.83,14.1036,1016.41,5.944444
4,8.755556,0.83,11.0446,1016.51,6.977778


In [5]:
scaler = MinMaxScaler()
weather_scaled = scaler.fit_transform(weather_data_cleaned)

In [6]:
# Create sequences for LSTM
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length, :-1])  # Features
        y.append(data[i+sequence_length, -1])    # Target
    return np.array(X), np.array(y)

In [7]:
sequence_length = 24
X, y = create_sequences(weather_scaled, sequence_length)

In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:

# Build the CNN-LSTM model
model = Sequential()

# Convolutional layers
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length, X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(BatchNormalization())

# LSTM layers
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, activation='relu'))
model.add(Dropout(0.2))

# Dense layers
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1))  # Output layer for regression


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [11]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=25,  # Increase epochs for better results
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


Epoch 1/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 28ms/step - loss: 0.0080 - mae: 0.0588 - val_loss: 0.0025 - val_mae: 0.0409
Epoch 2/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 26ms/step - loss: 0.0015 - mae: 0.0293 - val_loss: 0.0019 - val_mae: 0.0349
Epoch 3/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 28ms/step - loss: 0.0011 - mae: 0.0254 - val_loss: 0.0077 - val_mae: 0.0780
Epoch 4/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 24ms/step - loss: 0.0010 - mae: 0.0234 - val_loss: 0.0030 - val_mae: 0.0451
Epoch 5/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 27ms/step - loss: 9.0495e-04 - mae: 0.0222 - val_loss: 0.0011 - val_mae: 0.0257
Epoch 6/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 24ms/step - loss: 8.0082e-04 - mae: 0.0208 - val_loss: 0.0017 - val_mae: 0.0329
Epoch 7/25
[1m1929/1929[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [12]:
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")


Test Loss: 0.0021, Test MAE: 0.0375


In [13]:
# Predict on the test set
y_pred = model.predict(X_test)

[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step


In [14]:

# Example scaled input for prediction (24 timesteps, 4 features)
sample_input = np.array([
    [0.5, 0.85, 0.2, 0.97],  # Hour 1
    [0.51, 0.86, 0.22, 0.97],  # Hour 2
    [0.49, 0.83, 0.18, 0.96],  # Hour 3
    [0.53, 0.89, 0.15, 0.96],  # Hour 4
    [0.52, 0.88, 0.19, 0.95],  # Hour 5
    [0.54, 0.87, 0.17, 0.94],  # Hour 6
    [0.55, 0.86, 0.21, 0.96],  # Hour 7
    [0.48, 0.84, 0.20, 0.95],  # Hour 8
    [0.47, 0.82, 0.18, 0.94],  # Hour 9
    [0.50, 0.83, 0.19, 0.96],  # Hour 10
    [0.51, 0.85, 0.21, 0.97],  # Hour 11
    [0.49, 0.86, 0.23, 0.96],  # Hour 12
    [0.53, 0.88, 0.20, 0.95],  # Hour 13
    [0.52, 0.87, 0.18, 0.94],  # Hour 14
    [0.54, 0.85, 0.22, 0.96],  # Hour 15
    [0.55, 0.84, 0.20, 0.95],  # Hour 16
    [0.48, 0.83, 0.19, 0.94],  # Hour 17
    [0.50, 0.82, 0.21, 0.96],  # Hour 18
    [0.51, 0.86, 0.23, 0.97],  # Hour 19
    [0.49, 0.85, 0.18, 0.96],  # Hour 20
    [0.53, 0.88, 0.19, 0.95],  # Hour 21
    [0.52, 0.87, 0.17, 0.94],  # Hour 22
    [0.54, 0.86, 0.20, 0.96],  # Hour 23
    [0.55, 0.84, 0.22, 0.97]   # Hour 24
])

# Reshape to match the input format expected by the model (1 sample, 24 timesteps, 4 features)
sample_input = sample_input.reshape(1, 24, 4)

# Make a prediction with the model
predicted_output = model.predict(sample_input)
print("Predicted Apparent Temperature:", predicted_output[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
Predicted Apparent Temperature: 0.56853944


In [15]:
# Save the model
model.save("weather_forecasting.h5")



In [17]:
import pickle
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)