# Loading Time-MMD Data

In [1]:
import pandas as pd
import requests
import io

# Accessing the MMD dataset from the GitHub repository "time-mmd"
# The data consists of numerical and textual data, available in various subfolders
DATASET_DOMAINS = [
    "Agriculture", "Climate", "Economy", "Energy", "Environment", "Health_AFR", "Health_US", "Security", "SocialGood", "Traffic"
]  # List of available domains to be downloaded

BASE_URL = "https://github.com/adityalab/time-mmd/raw/main/"  # Base URL of the GitHub repository from which data will be downloaded

DATAFRAMES = {}  # Dictionary to store the DataFrames


In [2]:
# Downloading and saving the CSV files for all domains
for domain in DATASET_DOMAINS:  # Loop through all domains (e.g., Agriculture, Energy, Environment)
    # Processing numerical data
    dataset_url = f"{BASE_URL}numerical/{domain}/{domain}.csv"
    dataset_name = f"numerical_{domain}"
    try:
        response = requests.get(dataset_url)  # Send an HTTP GET request to download the file
        if response.status_code == 200:  # Check if the download was successful (status code 200)
            # Load the CSV file into a DataFrame directly from the response content
            data = pd.read_csv(io.StringIO(response.text))
            DATAFRAMES[dataset_name] = data
        else:
            print(f"Error downloading the dataset {dataset_name}. Status Code: {response.status_code}")  # Print an error message if download fails
    except Exception as e:
        print(f"Error accessing the dataset {dataset_name}: {str(e)}")  # Print a general error message in case of an exception

    # Processing textual data (report and search)
    for suffix in ["report", "search"]:
        dataset_url = f"{BASE_URL}textual/{domain}/{domain}_{suffix}.csv"
        dataset_name = f"textual_{domain}_{suffix}"
        try:
            response = requests.get(dataset_url)  # Send an HTTP GET request to download the file
            if response.status_code == 200:  # Check if the download was successful (status code 200)
                # Load the CSV file into a DataFrame directly from the response content
                data = pd.read_csv(io.StringIO(response.text))
                DATAFRAMES[dataset_name] = data
            else:
                print(f"Error downloading the dataset {dataset_name}. Status Code: {response.status_code}")  # Print an error message if download fails
        except Exception as e:
            print(f"Error accessing the dataset {dataset_name}: {str(e)}")  # Print a general error message in case of an exception

In [4]:
# Example: Listing all available DataFrames in DATAFRAMES
print("Available DataFrames:")
print(DATAFRAMES.keys())

Available DataFrames:
dict_keys(['numerical_Agriculture', 'textual_Agriculture_report', 'textual_Agriculture_search', 'numerical_Climate', 'textual_Climate_report', 'textual_Climate_search', 'numerical_Economy', 'textual_Economy_report', 'textual_Economy_search', 'numerical_Energy', 'textual_Energy_report', 'textual_Energy_search', 'numerical_Environment', 'textual_Environment_report', 'textual_Environment_search', 'numerical_Health_AFR', 'textual_Health_AFR_report', 'textual_Health_AFR_search', 'numerical_Health_US', 'textual_Health_US_report', 'textual_Health_US_search', 'numerical_Security', 'textual_Security_report', 'textual_Security_search', 'numerical_SocialGood', 'textual_SocialGood_report', 'textual_SocialGood_search', 'numerical_Traffic', 'textual_Traffic_report', 'textual_Traffic_search'])


In [5]:
DATAFRAMES["numerical_Agriculture"]

Unnamed: 0,Date,Wholesale broiler composite,OT,Retail-wholesale spread for broiler composite,date,start_date,end_date
0,1980-01-01,,93.303958,,1980-01-01,1980-01-01,1980-01-31
1,1980-02-01,,92.391190,,1980-02-01,1980-02-01,1980-02-29
2,1980-03-01,,90.811401,,1980-03-01,1980-03-01,1980-03-31
3,1980-04-01,,88.235892,,1980-04-01,1980-04-01,1980-04-30
4,1980-05-01,,88.577714,,1980-05-01,1980-05-01,1980-05-31
...,...,...,...,...,...,...,...
527,2023-12-01,94.600368,244.235447,149.635079,2023-12-01,2023-12-01,2023-12-31
528,2024-01-01,,242.350564,,2024-01-01,2024-01-01,2024-01-31
529,2024-02-01,,237.414961,,2024-02-01,2024-02-01,2024-02-29
530,2024-03-01,,243.267957,,2024-03-01,2024-03-01,2024-03-31


In [5]:
!rmdir /s /q Time-LLM


Das System kann die angegebene Datei nicht finden.


In [25]:
!git clone https://github.com/KimMeen/Time-LLM.git

Cloning into 'Time-LLM'...
