## Exercise 2 - Entropy of a random variable from a data series

1. Identify a data series and estimate the probabilities of the outcomes based on 
their occurrences, updating the probabilities at each time step.
2. At each time step, compute the entropy, plot its behavior, and discuss the 
results
Note: In the presentation, include a link to the source of the data series


In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import datetime


API_KEY = '1f29dc3b9b2d4fa29f1115455242709'
LOCATION = 'Turin'

# Set dates within the last 7 days
today = datetime.date.today()
START_DATE = (today - datetime.timedelta(days=6)).strftime('%Y-%m-%d')
END_DATE = today.strftime('%Y-%m-%d')

# Function to fetch weather data for a given date
def fetch_weather_data(date):
    url = f"http://api.weatherapi.com/v1/history.json?key={API_KEY}&q={LOCATION}&dt={date}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # Extract the condition text for each hour
        conditions = [hour_data['condition']['text'] for hour_data in data['forecast']['forecastday'][0]['hour']]
        return conditions
    else:
        print(f"Failed to fetch data for {date}: {response.status_code}")
        return []

# Generate date range
date_range = pd.date_range(start=START_DATE, end=END_DATE)

# Initialize data structures
observations = []

# Fetch data for each date in the range
for date in date_range:
    date_str = date.strftime('%Y-%m-%d')
    daily_conditions = fetch_weather_data(date_str)
    observations.extend(daily_conditions)
print(observations)
# Check if observations are empty
if not observations:
    print("No data was fetched. Please check your API key and date range.")
else:
    # Possible outcomes (unique weather conditions observed)
    outcomes = list(set(observations))

    # Initialize counts and probability lists
    counts = defaultdict(int)
    probabilities_over_time = []
    entropies = []

    # Function to compute entropy given a probability distribution
    def compute_entropy(probs):
        probs = np.array(probs)
        probs_nonzero = probs[probs > 0]
        return -np.sum(probs_nonzero * np.log2(probs_nonzero))

    # Iterate over the observations to update probabilities and compute entropy
    for time_step, observation in enumerate(observations, start=1):
        # Update counts
        counts[observation] += 1
        total_counts = sum(counts.values())
        # Calculate current probabilities
        current_probabilities = [counts[outcome] / total_counts for outcome in outcomes]
        probabilities_over_time.append(current_probabilities)
        # Compute entropy
        entropy = compute_entropy(current_probabilities)
        entropies.append(entropy)

    # Convert probabilities_over_time to a NumPy array for easier indexing
    probabilities_over_time = np.array(probabilities_over_time)

    # Plotting the entropy over time
    plt.figure(figsize=(12, 6))
    plt.plot(range(1, len(entropies) + 1), entropies, marker='o')
    plt.title('Entropy Behavior Over Time')
    plt.xlabel('Time Step')
    plt.ylabel('Entropy H (bits)')
    plt.grid(True)
    plt.show()

    # Plotting the probabilities over time for each outcome
    plt.figure(figsize=(12, 6))
    for idx, outcome in enumerate(outcomes):
        plt.plot(range(1, len(probabilities_over_time) + 1), probabilities_over_time[:, idx], label=outcome)
    plt.title('Probability of Each Outcome Over Time')
    plt.xlabel('Time Step')
    plt.ylabel('Probability')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Display the final probabilities and entropy
    print('Final Probabilities after {} time steps:'.format(len(observations)))
    for outcome, prob in zip(outcomes, probabilities_over_time[-1]):
        print('P({}) = {:.4f}'.format(outcome, prob))
    print('Final Entropy: {:.4f} bits'.format(entropies[-1]))

    # Discussion
    print('\nDiscussion:')
    print('The entropy plot shows how the uncertainty of the weather conditions changes over time.')
    print('Initially, the entropy may fluctuate as the probabilities are updated with each new observation.')
    print('As more data is collected, the probabilities stabilize, and the entropy approaches a steady value.')
    print('This steady value reflects the underlying distribution of the weather conditions in the data series.')

    # Data Source
    print('\nData Source:')
    print('Weather data obtained from WeatherAPI (https://www.weatherapi.com/)')
