# Demo-1+: Vergleich von Zeitreihen mit Liniendiagrammen 

## Importing Dependencies

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as wgts
from IPython.display import display
import numpy as np

## Loading Datasets

In [13]:
covid_19_dataset = pd.read_csv('cleaned_dataset.csv')
regions_dataset = pd.read_csv('regions_dataset.csv')

## Sample Plot

In [14]:
# # Load dataset
# df = pd.read_csv("cleaned_dataset.csv")

# # Convert Date column to datetime format
# df['Date'] = pd.to_datetime(df['Date'])

# # Choose a country (e.g., 'Mexico')
# country = "Mexico"
# df_country = df[df['Country'] == country].sort_values(by="Date")

# # Compute cumulative sum
# df_country['Total Confirmed'] = df_country['Confirmed'].cumsum()
# df_country['Total Recovered'] = df_country['Recovered'].cumsum()
# df_country['Total Deaths'] = df_country['Deaths'].cumsum()

# # Plot time series graph
# plt.figure(figsize=(12, 6))
# plt.plot(df_country['Date'], df_country['Total Confirmed'], label="Confirmed Cases", color='blue')
# plt.plot(df_country['Date'], df_country['Total Recovered'], label="Recovered Cases", color='green')
# plt.plot(df_country['Date'], df_country['Total Deaths'], label="Deaths", color='red')

# # Formatting the plot
# plt.xlabel("Date")
# plt.ylabel("Cumulative Count")
# plt.title(f"COVID-19 Time Series for {country}")
# plt.legend()
# plt.xticks(rotation=45)
# plt.grid(True)

# # Show plot
# # plt.show()

## Plotting Functions

### Country Based Time Series Plotting

In [None]:
def plot_country_timeseries(country: str, from_date: str, to_date: str):
    """
    Plots a time series graph for a given country and date range.
    
    Parameters:
        country (str): The country name to filter data for.
        from_date (str): The start date in 'YYYY-MM-DD' format.
        to_date (str): The end date in 'YYYY-MM-DD' format.
    """
    # Load dataset
    df = pd.DataFrame(covid_19_dataset)

    # Convert Date column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    # Filter data for the specified country and date range
    df_country = df[(df['Country'] == country) & (df['Date'] >= from_date) & (df['Date'] <= to_date)]
    
    # Sort values by date
    df_country = df_country.sort_values(by="Date")

    # Compute cumulative sum
    df_country['Total Confirmed'] = df_country['Confirmed'].cumsum()
    df_country['Total Recovered'] = df_country['Recovered'].cumsum()
    df_country['Total Deaths'] = df_country['Deaths'].cumsum()

    # Plot time series graph
    plt.figure(figsize=(12, 6))
    plt.plot(df_country['Date'], df_country['Total Confirmed'], label="Confirmed Cases", color='blue')
    plt.plot(df_country['Date'], df_country['Total Recovered'], label="Recovered Cases", color='green')
    plt.plot(df_country['Date'], df_country['Total Deaths'], label="Deaths", color='red')

    # Formatting the plot
    plt.xlabel("Date")
    plt.ylabel("Cumulative Count")
    plt.title(f"COVID-19 Time Series for {country} ({from_date} to {to_date})")
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)

    # Show plot
    plt.show()

### Continent Based Time Series Plotting

In [None]:
def plot_multiple_timeseries(regions: list, column: str, from_date: str, to_date: str):
    """
    Plots a time series graph for multiple regions (countries, continents, or World) for a given column.

    Parameters:
        regions (list): List of regions (country names, continent names, or "World").
        column (str): The column to plot ("Confirmed", "Recovered", or "Deaths").
        from_date (str): Start date in 'YYYY-MM-DD' format.
        to_date (str): End date in 'YYYY-MM-DD' format.
    """

    df = pd.DataFrame(covid_19_dataset)

    # Convert Date column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Filter dataset for date range
    df = df[(df['Date'] >= from_date) & (df['Date'] <= to_date)]

    # Initialize the plot
    plt.figure(figsize=(12, 6))

    for region in regions:
        if region == "World":
            # Aggregate data globally
            df_region = df.groupby("Date")[column].sum().reset_index()
        elif region in regions_dataset["Continent"].unique():
            # Get countries belonging to the continent
            countries_in_continent = regions_dataset[regions_dataset["Continent"] == region]["Country"].unique()
            df_region = df[df["Country"].isin(countries_in_continent)].groupby("Date")[column].sum().reset_index()
        else:
            # Treat as a country
            df_region = df[df["Country"] == region].sort_values(by="Date")
    
        # Compute cumulative sum for the selected column
        df_region[f"Total {column}"] = df_region[column].cumsum()

        # Plot the time series
        plt.plot(df_region["Date"], df_region[f"Total {column}"], label=f"{region} ({column})")

    # Formatting the plot
    plt.xlabel("Date")
    plt.ylabel("Cumulative Count")
    plt.title(f"COVID-19 {column} Cases Over Time ({from_date} to {to_date})")
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)

    # Show plot
    plt.show()

