# 🌤️Weather Data Analysis & Temperature Prediction

This project analyzes historical weather data from Bangalore and predicts future temperature trends using **Linear Regression**.

### 📈 Goals:
- Explore and visualize temperature patterns
- Smooth noisy time-series data
- Build a simple regression model for forecasting
- Predict temperatures for the next 5 days (hourly)

Dataset: `Banglore.csv` (local dataset)

---


# Import Libraries

In [None]:
# Import core libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from datetime import timedelta
import matplotlib.dates as mdates

# Load the Dataset

In [None]:
# Load the dataset
df = pd.read_csv("Bangalore.csv")

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')  # Ensure it's sorted

# Optional: Drop NA values from temp column
df = df.dropna(subset=['temperature_2m'])

# Convert datetime to numeric (ordinal)
df['date_ordinal'] = df['date'].map(pd.Timestamp.toordinal)

# ========== Plot Raw + Smoothed Data ==========
df['rolling_temp'] = df['temperature_2m'].rolling(window=24*7).mean()

# Quick preview
df[['date', 'temperature_2m']].sample(5)



# Visualize Temperature Over Time 
## 📊 Raw Temperature Over Time
We plot the temperature readings over time to observe trends and daily fluctuations.


In [None]:
plt.figure(figsize=(14, 6))
plt.plot(df['date'], df['temperature_2m'], label='Temperature (°C)', color='orange')
plt.title("Hourly Temperature in Bangalore")
plt.xlabel("Year")
plt.ylabel("Temperature (°C)")
plt.grid(True)
plt.legend()
plt.show()


# Apply Smoothing to See Trends
## 📉 Smoothed Temperature Trend
We apply a rolling average (7-day window) to reduce noise and highlight long-term patterns.


In [None]:
df['temp_smooth'] = df['temperature_2m'].rolling(window=24*7).mean()

plt.figure(figsize=(14, 6))
plt.plot(df['date'], df['temperature_2m'], label='Raw', alpha=0.3)
plt.plot(df['date'], df['temp_smooth'], label='7-Day Rolling Avg', color='red')
plt.title("Smoothed Temperature Trend (Weekly Rolling Average)")
plt.xlabel("Year")
plt.ylabel("Temperature (°C)")
plt.grid(True)
plt.legend()
plt.show()


## 🧠 Prepare Features and Target
We convert the date to numeric format and use it to predict temperature using a linear regression model.


In [None]:
# ----- Feature Engineering for TRAINING -----
df['date_ordinal'] = df['date'].map(pd.Timestamp.toordinal)
df['dayofyear'] = df['date'].dt.dayofyear
df['hour'] = df['date'].dt.hour

# Cyclical features for daily + annual seasonality
df['sin_doy'] = np.sin(2 * np.pi * df['dayofyear'] / 365.25)
df['cos_doy'] = np.cos(2 * np.pi * df['dayofyear'] / 365.25)
df['sin_hour'] = np.sin(2 * np.pi * df['hour'] / 24)
df['cos_hour'] = np.cos(2 * np.pi * df['hour'] / 24)

# Final features used in training
final_features = [
    'date_ordinal',
    'relative_humidity_2m',
    'pressure_msl',
    'cloud_cover',
    'sin_doy',
    'cos_doy',
    'sin_hour',
    'cos_hour'
]
X = df[final_features]
y = df['temperature_2m']

# Train/test split and model fit
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)


# Train the Linear Regression Model

In [None]:
# Fit model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

## 📈 Actual vs Predicted Temperature
Compare the model's predicted temperatures to actual test values.


In [None]:
# Plot actual vs predicted
plt.figure(figsize=(14, 6))
plt.fill_between(df['date'], df['temperature_2m'], alpha=0.3, label='Actual', color='skyblue')
plt.plot(df.iloc[X_test.index]['date'], y_pred, color="green", label='Predicted (Test)')
plt.title("Temperature Prediction using Linear Regression (Reduced Features)")
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.grid(True)
plt.legend()
plt.show()

## 🔮 Predict Future Temperatures
We forecast hourly temperatures for the next 5 days using the trained model.


In [None]:
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ====== Step 1: Generate future hourly timestamps ======
future_dates = [df['date'].max() + timedelta(hours=i) for i in range(1, 24*5 + 1)]
future_df = pd.DataFrame({'date': future_dates})

# ====== Step 2: Create time-based features ======
future_df['date_ordinal'] = future_df['date'].map(pd.Timestamp.toordinal)
future_df['dayofyear'] = future_df['date'].dt.dayofyear
future_df['hour'] = future_df['date'].dt.hour

# Add cyclic features for daily and annual seasonality
future_df['sin_doy'] = np.sin(2 * np.pi * future_df['dayofyear'] / 365.25)
future_df['cos_doy'] = np.cos(2 * np.pi * future_df['dayofyear'] / 365.25)
future_df['sin_hour'] = np.sin(2 * np.pi * future_df['hour'] / 24)
future_df['cos_hour'] = np.cos(2 * np.pi * future_df['hour'] / 24)

# ====== Step 3: Use last 24 hours pattern for other features ======
# This avoids static values and simulates realistic hourly fluctuation
features_to_repeat = ['relative_humidity_2m', 'pressure_msl', 'cloud_cover']
for col in features_to_repeat:
    recent_24 = df[col].tail(24).values  # last 24 values
    future_df[col] = np.tile(recent_24, int(len(future_df) / 24))

# ====== Step 4: Prepare input in correct order ======
final_features = [
    'date_ordinal',
    'relative_humidity_2m',
    'pressure_msl',
    'cloud_cover',
    'sin_doy',
    'cos_doy',
    'sin_hour',
    'cos_hour'
]
future_X = future_df[final_features]

# ====== Step 5: Predict future temperature ======
future_preds = model.predict(future_X)

# ====== Step 6: Plot forecast ======
plt.figure(figsize=(14, 6))
plt.plot(future_df['date'], future_preds, label='Forecast (Next 5 Days)', color='blue')
plt.title("Hourly Temperature Forecast (Next 5 Days)")
plt.xlabel("Date")
plt.ylabel("Predicted Temp (°C)")
plt.grid(True)
plt.legend()
plt.ticklabel_format(useOffset=False, style='plain', axis='y')
plt.tight_layout()
plt.show()
