In [1]:
# Setup environment detection and configuration

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Detect if running in Kaggle
def is_kaggle():
    return os.path.exists('/kaggle/input')

# Get the correct path to project root
def get_project_root():
    if is_kaggle():
        return '/kaggle'
    else:
        # If notebook is in notebooks/ folder, go up one level for project root
        current_dir = os.getcwd()
        if os.path.basename(current_dir) == 'notebooks':
            return os.path.dirname(current_dir)  # Go up one level
        return current_dir  # Already at root level

# Set up environment-specific paths
project_root = get_project_root()

if is_kaggle():
    # Kaggle paths
    input_dir = '/kaggle/input'
    working_dir = '/kaggle/working'
    vis_dir = '/kaggle/working/visualizations'
else:
    # Local paths relative to project root
    input_dir = os.path.join(project_root, 'data', 'input')
    working_dir = os.path.join(project_root, 'data', 'working')
    vis_dir = os.path.join(project_root, 'data', 'visualizations')

# Create directories if they don't exist
for dir_path in [input_dir, working_dir, vis_dir]:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print(f"Created directory: {dir_path}")

# Display environment info
print(f"Running in {'Kaggle' if is_kaggle() else 'Local Jupyter'} environment")
print(f"Project root: {project_root}")
print(f"Input directory: {input_dir}")
print(f"Working directory: {working_dir}")
print(f"Visualization directory: {vis_dir}")

# Set visualization style
sns.set_style('whitegrid')
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

print("Setup complete. Ready to begin COVID-19 data analysis.")

Running in Local Jupyter environment
Project root: C:\Users\nelso\OneDrive\Desktop\covid19-global-tracker
Input directory: C:\Users\nelso\OneDrive\Desktop\covid19-global-tracker\data\input
Working directory: C:\Users\nelso\OneDrive\Desktop\covid19-global-tracker\data\working
Visualization directory: C:\Users\nelso\OneDrive\Desktop\covid19-global-tracker\data\visualizations
Setup complete. Ready to begin COVID-19 data analysis.


In [2]:
# Data Collection
# Download data from Our World in Data COVID-19 dataset

# Define file paths based on environment
owid_csv_name = 'owid-covid-data.csv'
output_file = os.path.join(working_dir, owid_csv_name)

# Possible Kaggle dataset paths (check common Kaggle COVID dataset names)
kaggle_possible_paths = [
    os.path.join(input_dir, 'covid19', owid_csv_name),
    os.path.join(input_dir, 'covid-19-data', owid_csv_name),
    os.path.join(input_dir, 'owid-covid-data', owid_csv_name),
    os.path.join(input_dir, 'owid-covid19-data', owid_csv_name),
    os.path.join(input_dir, 'covid19dataset', owid_csv_name),
    # Add more potential paths if needed
]

try:
    # First check if dataset exists in any Kaggle input path
    found_in_kaggle = False
    if is_kaggle():
        for path in kaggle_possible_paths:
            if os.path.exists(path):
                print(f"Found COVID-19 data at: {path}")
                df = pd.read_csv(path)
                found_in_kaggle = True
                break

    # If not found in Kaggle or not in Kaggle, try direct download
    if not found_in_kaggle:
        print("Downloading COVID-19 data from Our World in Data GitHub...")
        url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
        df = pd.read_csv(url)

        # Save a local copy
        df.to_csv(output_file, index=False)
        print(f"Data downloaded and saved to {output_file}")

    print(f"Data loaded successfully. Shape: {df.shape}")
    print(f"Date range: {df['date'].min()} to {df['date'].max()}")

except Exception as e:
    print(f"Error loading data: {e}")
    print("\nTo add this dataset to your Kaggle notebook:")
    print("1. Click 'Add data' at the top right of the notebook")
    print("2. Search for 'covid-19 dataset' or 'our world in data covid'")
    print("3. Add the dataset and run this cell again")
    print("\nOr you can manually download from:")
    print("https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv")

Downloading COVID-19 data from Our World in Data GitHub...
Data downloaded and saved to C:\Users\nelso\OneDrive\Desktop\covid19-global-tracker\data\working\owid-covid-data.csv
Data loaded successfully. Shape: (429435, 67)
Date range: 2020-01-01 to 2024-08-14
