<a href="https://colab.research.google.com/github/Saurabh1222/COVID-19-Analysis-and-Forecasting/blob/main/COVID_19_Analysis_and_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install prophet --upgrade

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from prophet import Prophet
from datetime import timedelta

In [3]:
# Define the filename for the uploaded data
FILE_PATH = 'covid_19_clean_complete.csv'

# --- 1. Data Accumulation and Preprocessing ---

def load_and_preprocess_data(file_path):
    """Loads and aggregates the global COVID-19 data."""
    print(f"Loading data from {file_path}...")
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}. Please ensure the file is correctly placed.")
        return None

    # Convert Date column to datetime objects
    df['Date'] = pd.to_datetime(df['Date'])

    # Aggregate data globally by Date
    global_df = df.groupby('Date')[['Confirmed', 'Deaths', 'Recovered', 'Active']].sum().reset_index()

    # Calculate the daily rate of infection (Daily Confirmed) and Recovery
    global_df['Daily_Confirmed'] = global_df['Confirmed'].diff().fillna(0)
    global_df['Daily_Recovered'] = global_df['Recovered'].diff().fillna(0)

    print("Data loaded and daily rates calculated.")
    return global_df

In [4]:
# --- 2. Visualization Functions (Plotly) ---

def plot_cumulative_trend(df):
    """Creates an interactive Plotly chart for cumulative global cases."""
    fig = go.Figure()

    # Confirmed Cases
    fig.add_trace(go.Scatter(
        x=df['Date'], y=df['Confirmed'], mode='lines',
        name='Confirmed Cases', line=dict(color='blue')
    ))
    # Recovered Cases
    fig.add_trace(go.Scatter(
        x=df['Date'], y=df['Recovered'], mode='lines',
        name='Recovered Cases', line=dict(color='green')
    ))
    # Deaths
    fig.add_trace(go.Scatter(
        x=df['Date'], y=df['Deaths'], mode='lines',
        name='Deaths', line=dict(color='red')
    ))

    fig.update_layout(
        title='Global Cumulative COVID-19 Trend',
        xaxis_title='Date',
        yaxis_title='Number of Cases',
        hovermode='x unified',
        template='plotly_dark'
    )
    fig.show()
    print("Cumulative Trend visualization displayed.")

def plot_daily_rate_trend(df):
    """Creates an interactive Plotly chart for daily rate of infection and recovery."""
    fig = px.line(df, x='Date', y=['Daily_Confirmed', 'Daily_Recovered'],
                  title='Global Daily Rate of Infection (New Cases) and Recovery',
                  labels={'value': 'New Cases/Recoveries', 'Date': 'Date'},
                  color_discrete_map={'Daily_Confirmed': 'orange', 'Daily_Recovered': 'teal'})

    fig.update_layout(
        xaxis_title='Date',
        yaxis_title='Daily Count',
        hovermode='x unified',
        template='plotly_dark'
    )
    fig.show()
    print("Daily Rate visualization displayed.")

In [None]:
# --- 3. Forecasting (Facebook Prophet) ---

def forecast_with_prophet(df, periods=7):
    """Trains a Prophet model and predicts future Confirmed Cases."""
    print(f"\nTraining Prophet model to forecast {periods} days...")

    # Prepare data for Prophet: needs columns 'ds' (date) and 'y' (target)
    prophet_df = df[['Date', 'Confirmed']].rename(columns={'Date': 'ds', 'Confirmed': 'y'})

    # Initialize and fit the model
    # The 'growth' parameter is explicitly set to 'linear' (default) to ensure
    # the model initialization is clean and doesn't rely on potentially missing
    # Stan configuration attributes.
    model = Prophet(
        growth='linear', # Explicitly setting growth to default 'linear'
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        seasonality_mode='multiplicative'
    )
    model.fit(prophet_df)

    # Create a DataFrame for future predictions (7 days)
    future = model.make_future_dataframe(periods=periods)

    # Make the prediction
    forecast = model.predict(future)

    print(f"Prophet forecast complete for {periods} days.")
    return model, forecast

In [5]:
# --- 4. Prediction Visualization ---

def plot_prophet_forecast(model, forecast, actual_df):
    """Plots the Prophet forecast with actual data."""
    # Isolate the prediction for the next 7 days
    last_date = actual_df['Date'].max()
    prediction_start_date = last_date + timedelta(days=1)

    future_prediction = forecast[forecast['ds'] >= prediction_start_date]

    # Plotting the historical data and prediction
    fig = go.Figure()

    # Historical Data
    fig.add_trace(go.Scatter(
        x=actual_df['Date'], y=actual_df['Confirmed'], mode='markers+lines',
        name='Actual Confirmed Cases', line=dict(color='cyan')
    ))

    # Prediction Interval (Uncertainty)
    fig.add_trace(go.Scatter(
        x=future_prediction['ds'], y=future_prediction['yhat_upper'], fill='tonexty',
        fillcolor='rgba(255, 0, 0, 0.1)', mode='lines', line=dict(width=0),
        name='Upper Bound (Uncertainty)'
    ))
    fig.add_trace(go.Scatter(
        x=future_prediction['ds'], y=future_prediction['yhat_lower'], fill='tonexty',
        fillcolor='rgba(255, 0, 0, 0.1)', mode='lines', line=dict(width=0),
        name='Lower Bound (Uncertainty)'
    ))

    # Predicted Trend (yhat)
    fig.add_trace(go.Scatter(
        x=future_prediction['ds'], y=future_prediction['yhat'], mode='lines',
        name='Predicted Trend (Next 7 Days)', line=dict(color='red', dash='dot')
    ))

    fig.update_layout(
        title='Prophet Forecast: Confirmed Cases (Next 7 Days)',
        xaxis_title='Date',
        yaxis_title='Predicted Confirmed Cases',
        template='plotly_dark'
    )
    fig.show()

    # Print the specific 7-day prediction
    print("\n--- 7-Day Confirmed Case Forecast ---")
    print(future_prediction[['ds', 'yhat']].tail(7).round(0).to_string(index=False))


In [6]:
# --- Main Execution ---

if __name__ == "__main__":
    # 1. Load and prepare data
    global_cases = load_and_preprocess_data(FILE_PATH)

    if global_cases is not None:
        # 2. Visualize Trends
        plot_cumulative_trend(global_cases)
        plot_daily_rate_trend(global_cases)

        # 3. Forecast
        prophet_model, forecast_results = forecast_with_prophet(global_cases, periods=7)

        # 4. Visualize Prediction and Print Results
        plot_prophet_forecast(prophet_model, forecast_results, global_cases)

Loading data from covid_19_clean_complete.csv...
Data loaded and daily rates calculated.


Cumulative Trend visualization displayed.


Daily Rate visualization displayed.

Training Prophet model to forecast 7 days...
Prophet forecast complete for 7 days.



--- 7-Day Confirmed Case Forecast ---
        ds       yhat
2020-07-28 16722401.0
2020-07-29 16972109.0
2020-07-30 17218038.0
2020-07-31 17448824.0
2020-08-01 17643316.0
2020-08-02 17777941.0
2020-08-03 17879405.0
