In [1]:
# Name: Athar Sayed
# Roll: R-01
# Date: 8/04/2025
# Task : Objective 1
# ----------------------------------------
# Part A – Data Preparation
# ----------------------------------------
# Importing Necessary Libraries in the code

import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense
from sklearn.metrics import mean_squared_error
import os

# ----------------------------------------
# File Paths
# ----------------------------------------

csv_path = r"F:\M.Tech_CollgeMaterials\DPL\Labtest\data\DailyDelhiClimateTest.csv"
log_path = r"F:\M.Tech_CollgeMaterials\DPL\Labtest\logs\temperature_prediction.log"
output_dir = r"F:\M.Tech_CollgeMaterials\DPL\Labtest\output"

# Ensuring that the  output directories exist
os.makedirs(os.path.dirname(log_path), exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# ----------------------------------------
# Logging Configuration
# ----------------------------------------

logging.basicConfig(
    filename=log_path,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logging.info("Execution started.")

# ----------------------------------------
# Load and Prepare Data
# ----------------------------------------

try:
    data = pd.read_csv(csv_path)
    logging.info("Dataset loaded successfully.")
except Exception as e:
    logging.error(f"Error loading dataset: {e}")
    raise

data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
logging.info("Date column parsed and set as index.")

# ----------------------------------------
# Visualize Temperature Trend
# ----------------------------------------

plt.figure(figsize=(12, 5))
sns.lineplot(data=data, x=data.index, y="meantemp")
plt.title("Mean Temperature Over Time (Delhi)")
plt.ylabel("Mean Temperature (°C)")
plt.xlabel("Date")
plt.grid(True)
plt.tight_layout()
trend_plot_path = os.path.join(output_dir, "temperature_trend.png")
plt.savefig(trend_plot_path)
plt.close()
logging.info(f"Temperature trend plotted and saved at {trend_plot_path}")

# ----------------------------------------
# Handle Missing Values
# ----------------------------------------

missing = data.isnull().sum()
logging.info(f"Missing values in dataset:\n{missing}")

# ----------------------------------------
# Normalize Temperature
# ----------------------------------------

scaler = MinMaxScaler()
data['meantemp_normalized'] = scaler.fit_transform(data[['meantemp']])
logging.info("Temperature normalized using MinMaxScaler.")

# ----------------------------------------
# Create Sequences
# ----------------------------------------

def create_sequences(data_array, seq_length=7):
    x, y = [], []
    for i in range(len(data_array) - seq_length):
        x.append(data_array[i:i + seq_length])
        y.append(data_array[i + seq_length])
    return np.array(x), np.array(y)

temperature_series = data['meantemp_normalized'].values
X, y = create_sequences(temperature_series)
X = X.reshape((X.shape[0], X.shape[1], 1))

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

logging.info(f"Input sequences prepared. X shape: {X.shape}, y shape: {y.shape}")
logging.info(f"Training data shape: X_train={X_train.shape}, y_train={y_train.shape}")
logging.info(f"Validation data shape: X_val={X_val.shape}, y_val={y_val.shape}")



In [2]:
# ----------------------------------------
# Part B – Model Building & Training
# ----------------------------------------

# Simple RNN Model
rnn_model = Sequential([
    SimpleRNN(50, activation='relu', input_shape=(7, 1)),
    Dense(1)
])
rnn_model.compile(optimizer='adam', loss='mse')
logging.info("RNN model compiled.")

rnn_history = rnn_model.fit(X_train, y_train, epochs=50, batch_size=8,
                            validation_data=(X_val, y_val), verbose=0)
logging.info("RNN model trained.")

# LSTM Model
lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(7, 1)),
    Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mse')
logging.info("LSTM model compiled.")

lstm_history = lstm_model.fit(X_train, y_train, epochs=50, batch_size=8,
                              validation_data=(X_val, y_val), verbose=0)
logging.info("LSTM model trained.")

# Plot Training and Validation Loss
plt.figure(figsize=(12, 5))
plt.plot(rnn_history.history['loss'], label='RNN Train Loss')
plt.plot(rnn_history.history['val_loss'], label='RNN Val Loss')
plt.plot(lstm_history.history['loss'], label='LSTM Train Loss')
plt.plot(lstm_history.history['val_loss'], label='LSTM Val Loss')
plt.title("Training & Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss (MSE)")
plt.legend()
plt.grid(True)
plt.tight_layout()
loss_plot_path = os.path.join(output_dir, "training_validation_loss.png")
plt.savefig(loss_plot_path)
plt.close()
logging.info(f"Training and validation loss plot saved at {loss_plot_path}")



  super().__init__(**kwargs)


In [3]:
# ----------------------------------------
# Part C – Results & Analysis
# ----------------------------------------

# Predictions
rnn_pred = rnn_model.predict(X_val)
lstm_pred = lstm_model.predict(X_val)

# Rescale to original
rnn_pred_rescaled = scaler.inverse_transform(rnn_pred)
lstm_pred_rescaled = scaler.inverse_transform(lstm_pred)
y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1))

# Plot Actual vs Predicted
plt.figure(figsize=(12, 5))
plt.plot(y_val_rescaled, label='Actual Temp')
plt.plot(rnn_pred_rescaled, label='RNN Predicted')
plt.plot(lstm_pred_rescaled, label='LSTM Predicted')
plt.title("Actual vs Predicted Temperatures")
plt.xlabel("Time Step (Day)")
plt.ylabel("Mean Temperature (°C)")
plt.legend()
plt.grid(True)
plt.tight_layout()
pred_plot_path = os.path.join(output_dir, "actual_vs_predicted.png")
plt.savefig(pred_plot_path)
plt.close()
logging.info(f"Actual vs predicted temperature plot saved at {pred_plot_path}")

# Evaluation
rnn_mse = mean_squared_error(y_val_rescaled, rnn_pred_rescaled)
lstm_mse = mean_squared_error(y_val_rescaled, lstm_pred_rescaled)
logging.info(f"RNN MSE: {rnn_mse:.4f}")
logging.info(f"LSTM MSE: {lstm_mse:.4f}")

# Performance Conclusion
if lstm_mse < rnn_mse:
    logging.info("Conclusion: LSTM performed better due to its long-term memory capabilities.")
else:
    logging.info("Conclusion: RNN performed better, possibly due to dataset simplicity.")

logging.info("Execution completed successfully.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
