In [None]:
# %%
import os
import requests
import zipfile
import pandas as pd
import datetime as dt
import yfinance as  yf

In [None]:
base_url = 'https://www.cftc.gov/files/dea/history/fut_disagg_txt_{}.zip'
output_dir = os.path.join(os.getcwd(), 'data')
output_file = 'fut_disagg.txt'
yf_code = 'ZW=F'
start_date = '2020-01-01'
end_date = '2021-01-01'
cftc_market_code = '001602'

In [None]:
for year in range(2021, 2020, -1):
    print(f"Downloading CFTC data for year {year}...")

    # Construct the URL and download path for this year
    url = base_url.format(year)
    output_zip = os.path.join(output_dir, f'fut_disagg_txt_{year}.zip')

    # Download the file
    r = requests.get(url)

    # Save it as a binary file
    with open(output_zip, 'wb') as f:
        f.write(r.content)

    # Open the downloaded zip file
    with zipfile.ZipFile(output_zip, 'r') as zip_ref:
        # Extract all the contents into the data directory
        zip_ref.extractall(output_dir)

    # The zip file is now unzipped. You can remove the zip file if you wish:
    os.remove(output_zip)

    # Load the data from the extracted file
    new_data = pd.read_csv(os.path.join(output_dir, f'f_year.txt'), delimiter='\t')

    # Append the data to the output file
    if os.path.exists(os.path.join(output_dir,output_file)):
        new_data.to_csv(os.path.join(output_dir,output_file), mode='a', header=False, index=False)
    else:
        new_data.to_csv(os.path.join(output_dir,output_file), mode='w', header=True, index=False)

In [None]:

    
print("Downloaded, selecting data...")
    
yf_df = yf.download(yf_code, start = start_date, end = end_date, progress = False)
yf_df


In [None]:
print("Output_file: ", output_file)
cftc_df = pd.read_csv(os.path.join(output_dir, output_file), delimiter=',')
print("Col:", cftc_df.columns)
cftc_df


In [None]:
df = pd.merge(yf_df, cftc_df, left_index=True, right_on='Report_Date_as_YYYY-MM-DD', how='outer').fillna(method='bfill')
df.set_index('Date', inplace=True)
df