In [43]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [44]:
df = pd.read_csv('kanpur.csv')

In [45]:
df = df.drop(columns=['moonrise', 'moonset', 'sunrise', 'sunset'])
df.columns

Index(['date_time', 'maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour',
       'uvIndex', 'uvIndex.1', 'moon_illumination', 'DewPointC', 'FeelsLikeC',
       'HeatIndexC', 'WindChillC', 'WindGustKmph', 'cloudcover', 'humidity',
       'precipMM', 'pressure', 'tempC', 'visibility', 'winddirDegree',
       'windspeedKmph'],
      dtype='object')

In [46]:
# Assuming df is your DataFrame with the weather data
def create_lag_features(df, target_column, lags):
    for lag in range(1, lags + 1):
        df[f'{target_column}_lag_{lag}'] = df[target_column].shift(lag)
    return df


In [47]:
# Apply lag features for each weather feature you're predicting
lag_days = 7
for col in ['maxtempC', 'mintempC', 'precipMM', 'windspeedKmph']:
    df = create_lag_features(df, col, lag_days)


In [48]:
df['date_time'] = pd.to_datetime(df['date_time'])

In [49]:
df['month'] = df['date_time'].dt.month


In [50]:
df.dropna(inplace=True)  # Drop rows with NaN values created by lag features

In [51]:
X = df.drop(columns=['maxtempC', 'mintempC', 'date_time'])
y_max = df['maxtempC']
y_min = df['mintempC']

In [52]:
# Train-test split
X_train, X_test, y_max_train, y_max_test = train_test_split(X, y_max, test_size=0.2, random_state=42)
X_train, X_test, y_min_train, y_min_test = train_test_split(X, y_min, test_size=0.2, random_state=42)


In [53]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

**Random Forest**

In [69]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


In [70]:
# Max temperature prediction
rf_max = RandomForestRegressor()
rf_max.fit(X_train, y_max_train)
y_max_pred = rf_max.predict(X_test[-7:])


In [71]:
rf_max_pred = rf_max.predict(X_test)

In [72]:
print("Mean absolute error: %.2f" % np.mean(np.absolute(rf_max_pred - y_max_test)))

Mean absolute error: 0.08


In [73]:
mae_max_rf = np.mean(np.absolute(rf_max_pred - y_max_test))

In [74]:
# Min temperature prediction
rf_min = RandomForestRegressor()
rf_min.fit(X_train, y_min_train)
y_min_pred = rf_min.predict(X_test[-7:])


In [75]:
rf_min_pred = rf_min.predict(X_test)

In [76]:
print("Mean absolute error: %.2f" % np.mean(np.absolute(rf_min_pred - y_min_test)))

Mean absolute error: 0.07


In [77]:
mae_min_rf = np.mean(np.absolute(rf_min_pred - y_min_test))

In [78]:
last_date = pd.to_datetime('2020-06-01')

In [79]:
print("Weather Forecast for the Next 7 Days:")
for i in range(7):
    prediction_date = last_date + timedelta(days=i+1)
    print(f"Date: {prediction_date.strftime('%Y-%m-%d')} - Max Temp: {y_max_pred[i]:.2f}°C, Min Temp: {y_min_pred[i]:.2f}°C")

Weather Forecast for the Next 7 Days:
Date: 2020-06-02 - Max Temp: 29.03°C, Min Temp: 14.96°C
Date: 2020-06-03 - Max Temp: 37.14°C, Min Temp: 18.00°C
Date: 2020-06-04 - Max Temp: 22.00°C, Min Temp: 9.00°C
Date: 2020-06-05 - Max Temp: 24.97°C, Min Temp: 12.00°C
Date: 2020-06-06 - Max Temp: 25.00°C, Min Temp: 15.00°C
Date: 2020-06-07 - Max Temp: 46.00°C, Min Temp: 27.00°C
Date: 2020-06-08 - Max Temp: 24.00°C, Min Temp: 16.00°C


In [80]:
print(f"Max Temperature Prediction - MAE: {mae_max_rf:.2f}")
print(f"Min Temperature Prediction - MAE: {mae_min_rf:.2f}")

Max Temperature Prediction - MAE: 0.08
Min Temperature Prediction - MAE: 0.07


In [81]:
# Calculate performance metrics
def evaluate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2


In [82]:
# Assuming y_test_max and y_test_min are your actual test target values
mae_max, rmse_max, r2_max = evaluate_metrics(y_max_test, rf_max_pred)
mae_min, rmse_min, r2_min = evaluate_metrics(y_min_test, rf_min_pred)

In [83]:
print(f"Max Temperature Prediction - MAE: {mae_max:.2f}, RMSE: {rmse_max:.2f}, R²: {r2_max:.2f}")
print(f"Min Temperature Prediction - MAE: {mae_min:.2f}, RMSE: {rmse_min:.2f}, R²: {r2_min:.2f}")

Max Temperature Prediction - MAE: 0.08, RMSE: 0.36, R²: 1.00
Min Temperature Prediction - MAE: 0.07, RMSE: 0.28, R²: 1.00


**LSTM**

In [84]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from datetime import timedelta


In [85]:
# Reshape the data for LSTM
X_train_lstm = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = np.reshape(X_test[-7:], (7, 1, X_test.shape[1]))


In [86]:
# Common settings
batch_size = 32
epochs = 50

In [87]:
# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

In [88]:
# Function to build LSTM model
def build_lstm_model():
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(1, X_train.shape[1])))
    model.add(Dropout(0.2))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [89]:
# Train the model for max temperature
model_max = build_lstm_model()
model_max.fit(X_train_lstm, y_max_train, batch_size=batch_size, epochs=epochs,
              callbacks=[early_stopping], shuffle=True, verbose=2)
y_max_pred_lstm = model_max.predict(X_test_lstm)


Epoch 1/50


  super().__init__(**kwargs)


2411/2411 - 7s - 3ms/step - loss: 43.3544
Epoch 2/50
2411/2411 - 4s - 2ms/step - loss: 6.2182
Epoch 3/50
2411/2411 - 4s - 2ms/step - loss: 3.8926
Epoch 4/50
2411/2411 - 4s - 2ms/step - loss: 1.7417
Epoch 5/50
2411/2411 - 4s - 2ms/step - loss: 0.8737
Epoch 6/50
2411/2411 - 4s - 2ms/step - loss: 0.6822
Epoch 7/50
2411/2411 - 4s - 2ms/step - loss: 0.6299
Epoch 8/50
2411/2411 - 4s - 2ms/step - loss: 0.6135
Epoch 9/50
2411/2411 - 4s - 2ms/step - loss: 0.5864
Epoch 10/50
2411/2411 - 4s - 2ms/step - loss: 0.5731
Epoch 11/50
2411/2411 - 4s - 2ms/step - loss: 0.5753
Epoch 12/50
2411/2411 - 4s - 2ms/step - loss: 0.5661
Epoch 13/50
2411/2411 - 4s - 2ms/step - loss: 0.5592
Epoch 14/50
2411/2411 - 5s - 2ms/step - loss: 0.5502
Epoch 15/50
2411/2411 - 4s - 2ms/step - loss: 0.5477
Epoch 16/50
2411/2411 - 5s - 2ms/step - loss: 0.5439
Epoch 17/50
2411/2411 - 4s - 2ms/step - loss: 0.5359
Epoch 18/50
2411/2411 - 7s - 3ms/step - loss: 0.5361
Epoch 19/50
2411/2411 - 7s - 3ms/step - loss: 0.5270
Epoch 20/50


In [90]:
# Train the model for min temperature
model_min = build_lstm_model()
model_min.fit(X_train_lstm, y_min_train, batch_size=batch_size, epochs=epochs,
              callbacks=[early_stopping], shuffle=True, verbose=2)
y_min_pred_lstm = model_min.predict(X_test_lstm)

Epoch 1/50
2411/2411 - 7s - 3ms/step - loss: 20.2878
Epoch 2/50
2411/2411 - 4s - 2ms/step - loss: 3.0607
Epoch 3/50
2411/2411 - 4s - 2ms/step - loss: 1.8136
Epoch 4/50
2411/2411 - 4s - 2ms/step - loss: 0.9952
Epoch 5/50
2411/2411 - 4s - 2ms/step - loss: 0.7264
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 331ms/step


In [91]:
# Display the weather forecast
last_date = pd.to_datetime('2020-06-01')

In [92]:
print("Weather Forecast for the Next 7 Days:")
for i in range(7):
    prediction_date = last_date + timedelta(days=i+1)
    print(f"Date: {prediction_date.strftime('%Y-%m-%d')} - Max Temp: {y_max_pred_lstm[i][0]:.2f}°C, Min Temp: {y_min_pred_lstm[i][0]:.2f}°C")

Weather Forecast for the Next 7 Days:
Date: 2020-06-02 - Max Temp: 29.01°C, Min Temp: 14.07°C
Date: 2020-06-03 - Max Temp: 37.18°C, Min Temp: 18.51°C
Date: 2020-06-04 - Max Temp: 21.80°C, Min Temp: 9.07°C
Date: 2020-06-05 - Max Temp: 24.91°C, Min Temp: 11.08°C
Date: 2020-06-06 - Max Temp: 24.68°C, Min Temp: 14.59°C
Date: 2020-06-07 - Max Temp: 46.05°C, Min Temp: 27.06°C
Date: 2020-06-08 - Max Temp: 23.75°C, Min Temp: 15.64°C


In [93]:
# Calculate performance metrics
def evaluate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2


In [94]:
y_max_test = y_max_test[-7:]
y_min_test = y_min_test[-7:]

In [95]:
# Assuming y_test_max and y_test_min are your actual test target values
mae_max, rmse_max, r2_max = evaluate_metrics(y_max_test, y_max_pred_lstm)
mae_min, rmse_min, r2_min = evaluate_metrics(y_min_test, y_min_pred_lstm)

In [96]:
print(f"Max Temperature Prediction - MAE: {mae_max:.2f}, RMSE: {rmse_max:.2f}, R²: {r2_max:.2f}")
print(f"Min Temperature Prediction - MAE: {mae_min:.2f}, RMSE: {rmse_min:.2f}, R²: {r2_min:.2f}")

Max Temperature Prediction - MAE: 0.16, RMSE: 0.19, R²: 1.00
Min Temperature Prediction - MAE: 0.47, RMSE: 0.57, R²: 0.99
