In [1]:
# -----------------------------
# markov_model.ipynb
# -----------------------------
import pandas as pd
import numpy as np
import random

# -----------------------------
# Step 1: Load dataset & extract condition_text
# -----------------------------
df = pd.read_csv("dehradun_weather_processed.csv")
condition = df["condition_text"].tolist()

# -----------------------------
# Step 2: Initialize Markov Chain transition matrix
# -----------------------------
states = list(set(condition))
transition_counts = {s: {s2: 0 for s2 in states} for s in states}

# Count transitions
for today, tomorrow in zip(condition[:-1], condition[1:]):
    transition_counts[today][tomorrow] += 1

# Convert counts to probabilities
transition_matrix = {}
for s in states:
    total = sum(transition_counts[s].values())
    transition_matrix[s] = {s2: transition_counts[s][s2]/total if total>0 else 0 for s2 in states}

print("\nSample Markov Chain Transition Matrix (first 5 states):")
for s in list(transition_matrix.keys())[:5]:
    print(f"{s}: {transition_matrix[s]}")

# -----------------------------
# Step 3: Simulation parameters
# -----------------------------
# You can import these from weather_simulator.ipynb or recompute here
mean_temp = np.mean(df["temperature_celsius"])
std_temp = np.std(df["temperature_celsius"])
mean_humidity = np.mean(df["humidity"])
std_humidity = np.std(df["humidity"])
lam_rain_events = max(0.5, np.mean(df["wind_kph"]))
storm_prob = 0.1

num_days = 30
weather_data = []

# Start with random initial state
current_state = random.choice(states)

# -----------------------------
# Step 4: Simulate each day
# -----------------------------
for day in range(num_days):
    temp = np.random.normal(mean_temp, std_temp)
    hum = np.random.normal(mean_humidity, std_humidity)
    rain = np.random.poisson(lam_rain_events)
    thunder = np.random.binomial(1, storm_prob)
    
    weather_data.append({
        "day": day+1,
        "condition": current_state,
        "temperature": round(temp,1),
        "humidity": round(hum,1),
        "rain": rain,
        "thunder": bool(thunder)
    })
    
    # Next day's weather based on Markov probabilities
    next_states = list(transition_matrix[current_state].keys())
    probs = list(transition_matrix[current_state].values())
    current_state = random.choices(next_states, weights=probs, k=1)[0]

# -----------------------------
# Step 5: Preview simulated weather
# -----------------------------
sim_df = pd.DataFrame(weather_data)
print("\nSimulated Weather Data (first 10 days):")
print(sim_df.head(10))


Sample Markov Chain Transition Matrix (first 5 states):
Clear: {'Clear': 0.8181818181818182, 'Partially cloudy': 0.18181818181818182}
Partially cloudy: {'Clear': 0.6666666666666666, 'Partially cloudy': 0.3333333333333333}

Simulated Weather Data (first 10 days):
   day         condition  temperature  humidity  rain  thunder
0    1             Clear         19.7      65.2     4    False
1    2             Clear         21.0      47.3     8    False
2    3             Clear         19.7      57.6     8    False
3    4             Clear         21.0      53.2     9    False
4    5             Clear         20.4      56.4     5    False
5    6  Partially cloudy         20.2      56.4     6    False
6    7  Partially cloudy         20.2      46.6     6     True
7    8             Clear         20.7      52.4     6    False
8    9             Clear         20.1      60.3     8    False
9   10             Clear         19.5      64.4     4     True
