In [1]:
import investpy as ip
import pandas as pd

# Getting the data #

In [135]:
def get_interest_rates(start_date, end_date):

    data = ip.economic_calendar(countries=["united states"], categories=["central_banks"], from_date=start_date, to_date=end_date)
    data = data[data["event"].str.contains("Interest Rate")]
    data["date"] = pd.to_datetime(data["date"])
    data['value'] = data.actual
    data['type'] = "Interest Rate"
    data.value = data.value.str.replace('%', '')
    return data

In [143]:
def get_nfp(start_date, end_date):

    data = ip.economic_calendar(countries=["united states"], categories=["employment"], from_date=start_date, to_date=end_date)
    data = data[data["event"].str.contains("Nonfarm")]
    data = data[~data["event"].str.contains("Private|ADP")]
    data["date"] = pd.to_datetime(data["date"])
    data['value'] = data.actual
    data['type'] = "NFP"
    data.value = data.value.str.replace('K', '')
    return data

In [136]:
def get_inflation(start_date, end_date):

    data = ip.economic_calendar(countries=["united states"], categories=["inflation"], from_date=start_date, to_date=end_date)
    data = data[data["event"].str.contains("CPI (YoY)", regex=False)]
    data = data[~data["event"].str.contains("Core")]
    data["date"] = pd.to_datetime(data["date"])
    data['value'] = data.actual
    data['type'] = "CPI"
    data.value = data.value.str.replace('%', '')
    return data

In [137]:
def get_pmi(start_date, end_date):
        
    data = ip.economic_calendar(countries=["united states"], categories=["economic_activity"], from_date=start_date, to_date=end_date)
    data = data[data["event"].str.contains("Manufacturing PMI")]
    data = data[~data["event"].str.contains("ISM")]
    data['month'] = pd.to_datetime(data['date'], format='%d/%m/%Y').dt.to_period('M')
    data['value'] = data.actual
    data['type'] = "PMI"
    return data.groupby('month').head(1).drop('month', axis=1)

In [138]:
def get_unemployment(start_date, end_date):
    
    data = ip.economic_calendar(countries=["united states"], categories=["employment"], from_date=start_date, to_date=end_date)
    data = data[data["event"].str.contains("Unemployment")]
    data = data[~data["event"].str.contains("U6")]
    data["date"] = pd.to_datetime(data["date"])
    data['value'] = data.actual
    data['type'] = "Unemployment"
    data.value = data.value.str.replace('%', '')
    return data    

In [150]:
def get_gpr(start_date, end_date):
    data = pd.read_excel("https://www.matteoiacoviello.com/gpr_files/data_gpr_daily_recent.xls")
    data = data[["GPRD", "date"]]
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    data = data[(data["date"] >= start_date) & (data["date"] <= end_date)]
    data['value'] = data.GPRD
    data.value = pd.to_numeric(data.value, errors="coerce").round(0)
    data['type'] = "GPRD"
    return data

# Combining the data #

In [153]:
def combine_and_sort_data(start_date, end_date):
    # Get all datasets
    interest_rates = get_interest_rates(start_date, end_date)
    nfp = get_nfp(start_date, end_date)
    inflation = get_inflation(start_date, end_date)
    pmi = get_pmi(start_date, end_date)
    unemployment = get_unemployment(start_date, end_date)
    gpr = get_gpr(start_date, end_date)

    combined_data = pd.concat([interest_rates, nfp, inflation, pmi, unemployment, gpr])
    combined_data['date'] = pd.to_datetime(combined_data['date']).dt.strftime('%Y-%m-%d')
    combined_data.to_excel("data.xlsx")
    return combined_data[["date", "value", "type"]]