<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/get_ticker_data_20241223.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Get Ticker Data From Yf

Upload tickers_list.csv with ticker list.
Round values to 5 decimals and run error check.
Aggregate into W and M.
Export to workbook folders.

1.   Upload tickers_list.csv with ticker list.
2.   Round values to 5 decimals
3.   Run error check.
4.   Aggregate into W and M.
5.   Export to local workbook folders.

In [7]:
import yfinance as yf
import pandas as pd
import numpy as np
import os

# Read tickers from CSV file
tickers = pd.read_csv('/content/tickers_list.csv', header=None)[0].tolist()  # Adjust filename and format as needed

# Create export directories if they don't exist
os.makedirs('/content/export_daily', exist_ok=True)
os.makedirs('/content/export_weekly', exist_ok=True)
os.makedirs('/content/export_monthly', exist_ok=True)

def validate_data(df, ticker):
    """Validate OHLC data for errors"""
    original_count = len(df)

    # Round High and Close UP, Low DOWN to 5 decimal places
    df['High'] = np.ceil(df['High'] * 100000) / 100000
    df['Low'] = np.floor(df['Low'] * 100000) / 100000
    df['Close'] = np.ceil(df['Close'] * 100000) / 100000
    # Keep Open with standard rounding
    df['Open'] = df['Open'].round(5)

    # Create mask for each condition
    non_zero = (df['Open'] > 0) & (df['High'] > 0) & (df['Low'] > 0) & (df['Close'] > 0)
    valid_low = (df['Low'] <= df['Close']) & (df['Low'] <= df['Open'])
    valid_high = (df['High'] >= df['Close']) & (df['High'] >= df['Open'])
    valid_range = (df['High'] - df['Low']) > 0

    # Combine all conditions
    valid_rows = non_zero & valid_low & valid_high & valid_range

    # Filter data
    df_clean = df[valid_rows].copy()

    # Report removed rows
    removed_count = original_count - len(df_clean)
    if removed_count > 0:
        print(f"{ticker}: Removed {removed_count} invalid rows out of {original_count}")

    return df_clean

def aggregate_data(df, ticker):
    agg_rules = {
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last'
    }

    weekly_data = df.set_index('Date').resample('W').agg(agg_rules).reset_index()
    monthly_data = df.set_index('Date').resample('ME').agg(agg_rules).reset_index()

    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    weekly_data = weekly_data[column_order]
    monthly_data = monthly_data[column_order]

    # Validate aggregated data
    weekly_data = validate_data(weekly_data, f"{ticker}_weekly")
    monthly_data = validate_data(monthly_data, f"{ticker}_monthly")

    return weekly_data, monthly_data

def save_to_csv(data, ticker, period):
    if period == 'daily':
        file_path = f'/content/export_daily/{ticker}_daily.csv'
    elif period == 'weekly':
        file_path = f'/content/export_weekly/{ticker}_weekly.csv'
    else:
        file_path = f'/content/export_monthly/{ticker}_monthly.csv'

    data.to_csv(file_path, index=False)
    print(f"Saved {file_path}")

def process_ticker(data, ticker):
    print(f"Processing {ticker}...")
    try:
        # Extract ticker data and validate
        df = data[ticker].reset_index()  # Changed this line

        # Ensure we only keep OHLC columns
        df = df[['Date', 'Open', 'High', 'Low', 'Close']]

        # Validate and save daily data
        df = validate_data(df, ticker)

        if len(df) > 0:
            save_to_csv(df, ticker, 'daily')

            # Process and save weekly/monthly data
            weekly_data, monthly_data = aggregate_data(df, ticker)

            if len(weekly_data) > 0:
                save_to_csv(weekly_data, ticker, 'weekly')
            if len(monthly_data) > 0:
                save_to_csv(monthly_data, ticker, 'monthly')

            print(f"Successfully processed {ticker}")
        else:
            print(f"No valid data for {ticker}")

    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")

# Download data since 1995
print("Downloading data...")
data = yf.download(tickers, start="1994-01-01", end="2024-12-31", group_by="ticker")
print("Download complete. Processing tickers...")

# Process all tickers
for ticker in tickers:
    process_ticker(data, ticker)

print("All processing complete.")

Downloading data...


[*********************100%***********************]  134 of 134 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ABMD']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Download complete. Processing tickers...
Processing AAL...
AAL: Removed 2956 invalid rows out of 7803
Saved /content/export_daily/AAL_daily.csv
Saved /content/export_weekly/AAL_weekly.csv
Saved /content/export_monthly/AAL_monthly.csv
Successfully processed AAL
Processing AAP...
AAP: Removed 1993 invalid rows out of 7803
Saved /content/export_daily/AAP_daily.csv
Saved /content/export_weekly/AAP_weekly.csv
Saved /content/export_monthly/AAP_monthly.csv
Successfully processed AAP
Processing AAPL...
Saved /content/export_daily/AAPL_daily.csv
Saved /content/export_weekly/AAPL_weekly.csv
Saved /content/export_monthly/AAPL_monthly.csv
Successfully processed AAPL
Processing ABBV...
ABBV: Removed 4784 invalid rows out of 7803
Saved /content/export_daily/ABBV_daily.csv
Saved /content/export_weekly/ABBV_weekly.csv
Saved /content/export_monthly/ABBV_monthly.csv
Successfully processed ABBV
Processing ABMD...
ABMD: Removed 7803 invalid rows out of 7803
No valid data for ABMD
Processing ACN...
ACN: Re

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['High'] = np.ceil(df['High'] * 100000) / 100000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Low'] = np.floor(df['Low'] * 100000) / 100000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Close'] = np.ceil(df['Close'] * 100000) / 100000
A value is trying to be set on a copy of a slice fr

Saved /content/export_weekly/JPM_weekly.csv
Saved /content/export_monthly/JPM_monthly.csv
Successfully processed JPM
Processing KHC...
KHC: Removed 5414 invalid rows out of 7803
Saved /content/export_daily/KHC_daily.csv
Saved /content/export_weekly/KHC_weekly.csv
Saved /content/export_monthly/KHC_monthly.csv
Successfully processed KHC
Processing KO...
Saved /content/export_daily/KO_daily.csv
Saved /content/export_weekly/KO_weekly.csv
Saved /content/export_monthly/KO_monthly.csv
Successfully processed KO
Processing LCID...
LCID: Removed 6726 invalid rows out of 7803
Saved /content/export_daily/LCID_daily.csv
Saved /content/export_weekly/LCID_weekly.csv
Saved /content/export_monthly/LCID_monthly.csv
Successfully processed LCID
Processing LIN...
Saved /content/export_daily/LIN_daily.csv
Saved /content/export_weekly/LIN_weekly.csv
Saved /content/export_monthly/LIN_monthly.csv
Successfully processed LIN
Processing LLY...
Saved /content/export_daily/LLY_daily.csv
Saved /content/export_weekly

Download zip files of D, W, and M data to Downloads.

In [9]:
from google.colab import files
import os
import zipfile

def zip_and_download(source_dir, period):
    # Count files
    file_count = len([name for name in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, name))])
    print(f"Found {file_count} {period} files")

    # Create zip file
    zip_filename = f'{period}_files.zip'
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for file in os.listdir(source_dir):
            file_path = os.path.join(source_dir, file)
            if os.path.isfile(file_path):
                zipf.write(file_path, arcname=file)

    # Download zip file
    files.download(zip_filename)
    print(f"Downloaded {zip_filename}")

# Create and download zips for each period
print("\nCreating and downloading zip files...\n")
zip_and_download('/content/export_monthly', 'monthly')
zip_and_download('/content/export_weekly', 'weekly')
zip_and_download('/content/export_daily', 'daily')


Creating and downloading zip files...

Found 133 monthly files


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded monthly_files.zip
Found 133 weekly files


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded weekly_files.zip
Found 133 daily files


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded daily_files.zip


# JUNK BELOW

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
import os

# Read tickers from CSV file
tickers = pd.read_csv('/content/tickers_list.csv', header=None)[0].tolist()  # Adjust filename and format as needed

# Create export directories if they don't exist
os.makedirs('/content/export_daily', exist_ok=True)
os.makedirs('/content/export_weekly', exist_ok=True)
os.makedirs('/content/export_monthly', exist_ok=True)

def validate_data(df, ticker):
    """Validate OHLC data for errors"""
    original_count = len(df)

    # Round High and Close UP, Low DOWN to 5 decimal places
    df['High'] = np.ceil(df['High'] * 100000) / 100000
    df['Low'] = np.floor(df['Low'] * 100000) / 100000
    df['Close'] = np.ceil(df['Close'] * 100000) / 100000
    # Keep Open with standard rounding
    df['Open'] = df['Open'].round(5)

    # Create mask for each condition
    non_zero = (df['Open'] > 0) & (df['High'] > 0) & (df['Low'] > 0) & (df['Close'] > 0)
    valid_low = (df['Low'] <= df['Close']) & (df['Low'] <= df['Open'])
    valid_high = (df['High'] >= df['Close']) & (df['High'] >= df['Open'])
    valid_range = (df['High'] - df['Low']) > 0

    # Combine all conditions
    valid_rows = non_zero & valid_low & valid_high & valid_range

    # Filter data
    df_clean = df[valid_rows].copy()

    # Report removed rows
    removed_count = original_count - len(df_clean)
    if removed_count > 0:
        print(f"{ticker}: Removed {removed_count} invalid rows out of {original_count}")

    return df_clean

def aggregate_data(df):
    agg_rules = {
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last'
    }

    weekly_data = df.set_index('Date').resample('W').agg(agg_rules).reset_index()
    monthly_data = df.set_index('Date').resample('ME').agg(agg_rules).reset_index()

    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    weekly_data = weekly_data[column_order]
    monthly_data = monthly_data[column_order]

    # Validate aggregated data
    weekly_data = validate_data(weekly_data, f"{df.name}_weekly")
    monthly_data = validate_data(monthly_data, f"{df.name}_monthly")

    return weekly_data, monthly_data

def save_to_csv(data, ticker, period):
    if period == 'daily':
        file_path = f'/content/export_daily/{ticker}_daily.csv'
    elif period == 'weekly':
        file_path = f'/content/export_weekly/{ticker}_weekly.csv'
    else:
        file_path = f'/content/export_monthly/{ticker}_monthly.csv'

    data.to_csv(file_path, index=False)
    print(f"Saved {file_path}")

def process_ticker(data, ticker):
    print(f"Processing {ticker}...")
    try:
        # Extract ticker data and validate
        df = pd.DataFrame(data[ticker]).reset_index()
        df.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
        df = df[['Date', 'Open', 'High', 'Low', 'Close']]  # Keep only OHLC columns
        df.name = ticker  # Add name attribute for reference in aggregation

        # Validate and save daily data
        df = validate_data(df, ticker)

        if len(df) > 0:
            save_to_csv(df, ticker, 'daily')

            # Process and save weekly/monthly data
            weekly_data, monthly_data = aggregate_data(df)

            if len(weekly_data) > 0:
                save_to_csv(weekly_data, ticker, 'weekly')
            if len(monthly_data) > 0:
                save_to_csv(monthly_data, ticker, 'monthly')

            print(f"Successfully processed {ticker}")
        else:
            print(f"No valid data for {ticker}")

    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")

# Download data since 1995
print("Downloading data...")
data = yf.download(tickers, start="1994-01-01", end="2024-12-31", group_by="ticker")
print("Download complete. Processing tickers...")

# Process all tickers
for ticker in tickers:
    process_ticker(data, ticker)

print("All processing complete.")

Downloading data...


[*********************100%***********************]  134 of 134 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ABMD']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Download complete. Processing tickers...
Processing AAL...
Error processing AAL: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing AAP...
Error processing AAP: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing AAPL...
Error processing AAPL: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing ABBV...
Error processing ABBV: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing ABMD...
ABMD: Removed 7803 invalid rows out of 7803
No valid data for ABMD
Processing ACN...
Error processing ACN: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing ADBE...
Error processing ADBE: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing ADI...
Error processing ADI: Length mismatch: Expected axis has 6 elements, new values have 7 elements
Processing ADP...
Error processing ADP: Length mismatch: Expected axi

In [None]:
import yfinance as yf
import pandas as pd

# Read tickers from CSV file
tickers = pd.read_csv('tickers.csv', header=None)[0].tolist()  # Adjust filename and format as needed

# Download data since 1995
data = yf.download(tickers, start="1994-01-01", end="2024-12-31", group_by="ticker")

# Save each ticker's data as a CSV
for ticker in tickers:
    data[ticker].to_csv(f"{ticker}_OHLC.csv")

print("OHLC data downloaded and saved.")

[*********************100%***********************]  69 of 69 completed


OHLC data downloaded and saved.


In [None]:
import pandas as pd
import os

# Use same tickers as cell 1
tickers = ["MMM", "ABBV", "ABMD", "ACN", "ADBE", "ADI", "ADP", "ADSK", "AEP", "AIG", "ALGN", "ALK", "AMAT", "AMD", "AMGN", "AMZN", "AAPL", "BA", "BAC", "BK", "BIIB", "BLK", "BMY", "CAT", "CVX", "CSCO", "C", "CMCSA", "COP", "COST", "CVS", "DHR", "DE", "DELL", "DIS", "DOW", "EBAY", "EMR", "EXC", "F", "FDX", "GD", "GE", "GILD", "GM", "GOOG", "GS", "HD", "HON", "IBM", "INTC", "JNJ", "JPM", "KHC", "KO", "LIN", "LLY", "LMT", "LOW", "MA", "MCD", "MDLZ", "MDT", "MET", "META", "MMC", "MO", "MRK", "MS", "MSFT", "NEE", "NFLX", "NKE", "NVDA", "ORCL", "PEP", "PFE", "PG", "PM", "PYPL", "QCOM", "RTX", "SBUX", "SCHW", "SO", "SPG", "T", "TGT", "TMO", "TMUS", "TSLA", "TXN", "UNH", "UNP", "UPS", "USB", "V", "VZ", "WBA", "WFC", "WMT", "XOM"]
# Create export directories if they don't exist
os.makedirs('/content/export_daily', exist_ok=True)
os.makedirs('/content/export_weekly', exist_ok=True)
os.makedirs('/content/export_monthly', exist_ok=True)

def validate_data(df, ticker):
    """Validate OHLC data for errors"""
    # Store original row count
    original_count = len(df)

    # Round all values to 4 decimal places
    #for col in ['Open', 'High', 'Low', 'Close']:
    #    df[col] = df[col].round(4)

    # Round High and Close UP, Low DOWN to 5 decimal places
    df['High'] = np.ceil(df['High'] * 100000) / 100000
    df['Low'] = np.floor(df['Low'] * 100000) / 100000
    df['Close'] = np.ceil(df['Close'] * 100000) / 100000
    # Keep Open with standard rounding
    df['Open'] = df['Open'].round(5)

    # Create mask for each condition
    non_zero = (df['Open'] > 0) & (df['High'] > 0) & (df['Low'] > 0) & (df['Close'] > 0)
    valid_low = (df['Low'] <= df['Close']) & (df['Low'] <= df['Open'])
    valid_high = (df['High'] >= df['Close']) & (df['High'] >= df['Open'])
    valid_range = (df['High'] - df['Low']) > 0

    # Combine all conditions
    valid_rows = non_zero & valid_low & valid_high & valid_range

    # Filter data
    df_clean = df[valid_rows].copy()

    # Report removed rows
    removed_count = original_count - len(df_clean)
    if removed_count > 0:
        print(f"{ticker}: Removed {removed_count} invalid rows out of {original_count}")

    return df_clean

def load_data(ticker):
    file_path = f'{ticker}_OHLC.csv'
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])

    # Ensure column order for daily data
    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    df = df[column_order]

    # Validate and clean data before proceeding
    df = validate_data(df, ticker)

    return df

def aggregate_data(df):
    agg_rules = {
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last'
    }

    weekly_data = df.set_index('Date').resample('W').agg(agg_rules).reset_index()
    monthly_data = df.set_index('Date').resample('ME').agg(agg_rules).reset_index()

    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    weekly_data = weekly_data[column_order]
    monthly_data = monthly_data[column_order]

    # Validate aggregated data
    weekly_data = validate_data(weekly_data, f"{df.name}_weekly")
    monthly_data = validate_data(monthly_data, f"{df.name}_monthly")

    return weekly_data, monthly_data

def save_to_csv(data, ticker, period):
    if period == 'daily':
        file_path = f'/content/export_daily/{ticker}_daily.csv'
    elif period == 'weekly':
        file_path = f'/content/export_weekly/{ticker}_weekly.csv'
    else:
        file_path = f'/content/export_monthly/{ticker}_monthly.csv'

    data.to_csv(file_path, index=False)
    print(f"Saved {file_path}")

def process_ticker(ticker):
    print(f"Processing {ticker}...")
    try:
        # Load and save daily data
        df = load_data(ticker)
        df.name = ticker  # Add name attribute for reference in aggregation

        if len(df) > 0:  # Only proceed if we have valid data
            save_to_csv(df, ticker, 'daily')

            # Process and save weekly/monthly data
            weekly_data, monthly_data = aggregate_data(df)

            if len(weekly_data) > 0:
                save_to_csv(weekly_data, ticker, 'weekly')
            if len(monthly_data) > 0:
                save_to_csv(monthly_data, ticker, 'monthly')

            print(f"Successfully processed {ticker}")
        else:
            print(f"No valid data for {ticker}")

    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")

# Process all tickers
for ticker in tickers:
    process_ticker(ticker)

Processing SPY...
Saved /content/export_daily/SPY_daily.csv
Saved /content/export_weekly/SPY_weekly.csv
Saved /content/export_monthly/SPY_monthly.csv
Successfully processed SPY
Processing TQQQ...
TQQQ: Removed 1538 invalid rows out of 5284
Saved /content/export_daily/TQQQ_daily.csv
Saved /content/export_weekly/TQQQ_weekly.csv
Saved /content/export_monthly/TQQQ_monthly.csv
Successfully processed TQQQ
Processing QQQ...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/QQQ_daily.csv
Saved /content/export_weekly/QQQ_weekly.csv
Saved /content/export_monthly/QQQ_monthly.csv
Successfully processed QQQ
Processing SQQQ...
SQQQ: Removed 1538 invalid rows out of 5284
Saved /content/export_daily/SQQQ_daily.csv
Saved /content/export_weekly/SQQQ_weekly.csv
Saved /content/export_monthly/SQQQ_monthly.csv
Successfully processed SQQQ
Processing EEM...
Saved /content/export_daily/EEM_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/EEM_weekly.csv
Saved /content/export_monthly/EEM_monthly.csv
Successfully processed EEM
Processing XLF...
Saved /content/export_daily/XLF_daily.csv
Saved /content/export_weekly/XLF_weekly.csv
Saved /content/export_monthly/XLF_monthly.csv
Successfully processed XLF
Processing GLD...
GLD: Removed 222 invalid rows out of 5284
Saved /content/export_daily/GLD_daily.csv
Saved /content/export_weekly/GLD_weekly.csv
Saved /content/export_monthly/GLD_monthly.csv
Successfully processed GLD
Processing XLE...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/XLE_daily.csv
Saved /content/export_weekly/XLE_weekly.csv
Saved /content/export_monthly/XLE_monthly.csv
Successfully processed XLE
Processing EFA...
Saved /content/export_daily/EFA_daily.csv
Saved /content/export_weekly/EFA_weekly.csv
Saved /content/export_monthly/EFA_monthly.csv
Successfully processed EFA
Processing GDX...
GDX: Removed 600 invalid rows out of 5284
Saved /content/export_daily/GDX_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/GDX_weekly.csv
Saved /content/export_monthly/GDX_monthly.csv
Successfully processed GDX
Processing XLK...
Saved /content/export_daily/XLK_daily.csv
Saved /content/export_weekly/XLK_weekly.csv
Saved /content/export_monthly/XLK_monthly.csv
Successfully processed XLK
Processing TLT...
Saved /content/export_daily/TLT_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/TLT_weekly.csv
Saved /content/export_monthly/TLT_monthly.csv
Successfully processed TLT
Processing XLV...
Saved /content/export_daily/XLV_daily.csv
Saved /content/export_weekly/XLV_weekly.csv
Saved /content/export_monthly/XLV_monthly.csv
Successfully processed XLV
Processing FXI...
FXI: Removed 193 invalid rows out of 5284
Saved /content/export_daily/FXI_daily.csv
Saved /content/export_weekly/FXI_weekly.csv
Saved /content/export_monthly/FXI_monthly.csv
Successfully processed FXI
Processing XLY...
Saved /content/export_daily/XLY_daily.csv
Saved /content/export_weekly/XLY_weekly.csv
Saved /content/export_monthly/XLY_monthly.csv
Successfully processed XLY
Processing XLI...
Saved /content/export_daily/XLI_daily.csv
Saved /content/export_weekly/XLI_weekly.csv
Saved /content/export_monthly/XLI_monthly.csv
Successfully processed XLI
Processing XLU...
Saved /content/export_daily/XLU_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/XLU_weekly.csv
Saved /content/export_monthly/XLU_monthly.csv
Successfully processed XLU
Processing XLP...
Saved /content/export_daily/XLP_daily.csv
Saved /content/export_weekly/XLP_weekly.csv
Saved /content/export_monthly/XLP_monthly.csv
Successfully processed XLP
Processing XLB...
Saved /content/export_daily/XLB_daily.csv
Saved /content/export_weekly/XLB_weekly.csv
Saved /content/export_monthly/XLB_monthly.csv
Successfully processed XLB
Processing TSLA...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


TSLA: Removed 1633 invalid rows out of 5284
Saved /content/export_daily/TSLA_daily.csv
Saved /content/export_weekly/TSLA_weekly.csv
Saved /content/export_monthly/TSLA_monthly.csv
Successfully processed TSLA
Processing AMD...
AMD: Removed 1 invalid rows out of 5284
Saved /content/export_daily/AMD_daily.csv
Saved /content/export_weekly/AMD_weekly.csv
Saved /content/export_monthly/AMD_monthly.csv
Successfully processed AMD
Processing AMZN...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/AMZN_daily.csv
Saved /content/export_weekly/AMZN_weekly.csv
Saved /content/export_monthly/AMZN_monthly.csv
Successfully processed AMZN
Processing META...
META: Removed 2110 invalid rows out of 5284
Saved /content/export_daily/META_daily.csv
Saved /content/export_weekly/META_weekly.csv
Saved /content/export_monthly/META_monthly.csv
Successfully processed META
Processing NVDA...
Saved /content/export_daily/NVDA_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/NVDA_weekly.csv
Saved /content/export_monthly/NVDA_monthly.csv
Successfully processed NVDA
Processing BAC...
Saved /content/export_daily/BAC_daily.csv
Saved /content/export_weekly/BAC_weekly.csv
Saved /content/export_monthly/BAC_monthly.csv
Successfully processed BAC
Processing F...
Saved /content/export_daily/F_daily.csv
Saved /content/export_weekly/F_weekly.csv
Saved /content/export_monthly/F_monthly.csv
Successfully processed F
Processing LCID...
LCID: Removed 4207 invalid rows out of 5284
Saved /content/export_daily/LCID_daily.csv
Saved /content/export_weekly/LCID_weekly.csv
Saved /content/export_monthly/LCID_monthly.csv
Successfully processed LCID
Processing WFC...
Saved /content/export_daily/WFC_daily.csv
Saved /content/export_weekly/WFC_weekly.csv
Saved /content/export_monthly/WFC_monthly.csv
Successfully processed WFC
Processing T...
Saved /content/export_daily/T_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/T_weekly.csv
Saved /content/export_monthly/T_monthly.csv
Successfully processed T
Processing PLTR...
PLTR: Removed 4215 invalid rows out of 5284
Saved /content/export_daily/PLTR_daily.csv
Saved /content/export_weekly/PLTR_weekly.csv
Saved /content/export_monthly/PLTR_monthly.csv
Successfully processed PLTR
Processing GE...
Saved /content/export_daily/GE_daily.csv
Saved /content/export_weekly/GE_weekly.csv
Saved /content/export_monthly/GE_monthly.csv
Successfully processed GE
Processing GOOGL...
GOOGL: Removed 158 invalid rows out of 5284


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/GOOGL_daily.csv
Saved /content/export_weekly/GOOGL_weekly.csv
Saved /content/export_monthly/GOOGL_monthly.csv
Successfully processed GOOGL
Processing GOOG...
GOOG: Removed 158 invalid rows out of 5284
Saved /content/export_daily/GOOG_daily.csv
Saved /content/export_weekly/GOOG_weekly.csv
Saved /content/export_monthly/GOOG_monthly.csv
Successfully processed GOOG
Processing PYPL...
PYPL: Removed 2895 invalid rows out of 5284
Saved /content/export_daily/PYPL_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/PYPL_weekly.csv
Saved /content/export_monthly/PYPL_monthly.csv
Successfully processed PYPL
Processing NFLX...
Saved /content/export_daily/NFLX_daily.csv
Saved /content/export_weekly/NFLX_weekly.csv
Saved /content/export_monthly/NFLX_monthly.csv
Successfully processed NFLX
Processing MRO...
MRO: Removed 26 invalid rows out of 5284
Saved /content/export_daily/MRO_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/MRO_weekly.csv
Saved /content/export_monthly/MRO_monthly.csv
Successfully processed MRO
Processing PFE...
Saved /content/export_daily/PFE_daily.csv
Saved /content/export_weekly/PFE_weekly.csv
Saved /content/export_monthly/PFE_monthly.csv
Successfully processed PFE
Processing SOFI...
SOFI: Removed 4280 invalid rows out of 5284
Saved /content/export_daily/SOFI_daily.csv
Saved /content/export_weekly/SOFI_weekly.csv
Saved /content/export_monthly/SOFI_monthly.csv
Successfully processed SOFI
Processing UBER...
UBER: Removed 3864 invalid rows out of 5284
Saved /content/export_daily/UBER_daily.csv
Saved /content/export_weekly/UBER_weekly.csv
Saved /content/export_monthly/UBER_monthly.csv
Successfully processed UBER
Processing XOM...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/XOM_daily.csv
Saved /content/export_weekly/XOM_weekly.csv
Saved /content/export_monthly/XOM_monthly.csv
Successfully processed XOM
Processing AAP...
Saved /content/export_daily/AAP_daily.csv
Saved /content/export_weekly/AAP_weekly.csv
Saved /content/export_monthly/AAP_monthly.csv
Successfully processed AAP
Processing MS...
Saved /content/export_daily/MS_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/MS_weekly.csv
Saved /content/export_monthly/MS_monthly.csv
Successfully processed MS
Processing BMY...
Saved /content/export_daily/BMY_daily.csv
Saved /content/export_weekly/BMY_weekly.csv
Saved /content/export_monthly/BMY_monthly.csv
Successfully processed BMY
Processing GM...
GM: Removed 1733 invalid rows out of 5284
Saved /content/export_daily/GM_daily.csv
Saved /content/export_weekly/GM_weekly.csv
Saved /content/export_monthly/GM_monthly.csv
Successfully processed GM
Processing OXY...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/OXY_daily.csv
Saved /content/export_weekly/OXY_weekly.csv
Saved /content/export_monthly/OXY_monthly.csv
Successfully processed OXY
Processing C...
Saved /content/export_daily/C_daily.csv
Saved /content/export_weekly/C_weekly.csv
Saved /content/export_monthly/C_monthly.csv
Successfully processed C
Processing CCL...
Saved /content/export_daily/CCL_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/CCL_weekly.csv
Saved /content/export_monthly/CCL_monthly.csv
Successfully processed CCL
Processing SNAP...
SNAP: Removed 3313 invalid rows out of 5284
Saved /content/export_daily/SNAP_daily.csv
Saved /content/export_weekly/SNAP_weekly.csv
Saved /content/export_monthly/SNAP_monthly.csv
Successfully processed SNAP
Processing SQ...
SQ: Removed 2992 invalid rows out of 5284
Saved /content/export_daily/SQ_daily.csv
Saved /content/export_weekly/SQ_weekly.csv
Saved /content/export_monthly/SQ_monthly.csv
Successfully processed SQ
Processing NIO...
NIO: Removed 3699 invalid rows out of 5284
Saved /content/export_daily/NIO_daily.csv
Saved /content/export_weekly/NIO_weekly.csv
Saved /content/export_monthly/NIO_monthly.csv
Successfully processed NIO
Processing X...
Saved /content/export_daily/X_daily.csv
Saved /content/export_weekly/X_weekly.csv
Saved /content/export_monthly/X_monthly.csv
Successfully processed X
Processing DAL...
DAL: Removed 838 invalid rows out of 5

  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/FCX_weekly.csv
Saved /content/export_monthly/FCX_monthly.csv
Successfully processed FCX
Processing BABA...
BABA: Removed 2697 invalid rows out of 5284
Saved /content/export_daily/BABA_daily.csv
Saved /content/export_weekly/BABA_weekly.csv
Saved /content/export_monthly/BABA_monthly.csv
Successfully processed BABA
Processing DVN...
Saved /content/export_daily/DVN_daily.csv
Saved /content/export_weekly/DVN_weekly.csv
Saved /content/export_monthly/DVN_monthly.csv
Successfully processed DVN
Processing MPC...
MPC: Removed 1883 invalid rows out of 5284


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/MPC_daily.csv
Saved /content/export_weekly/MPC_weekly.csv
Saved /content/export_monthly/MPC_monthly.csv
Successfully processed MPC
Processing RIVN...
RIVN: Removed 4496 invalid rows out of 5284
Saved /content/export_daily/RIVN_daily.csv
Saved /content/export_weekly/RIVN_weekly.csv
Saved /content/export_monthly/RIVN_monthly.csv
Successfully processed RIVN
Processing UAL...
UAL: Removed 527 invalid rows out of 5284
Saved /content/export_daily/UAL_daily.csv
Saved /content/export_weekly/UAL_weekly.csv
Saved /content/export_monthly/UAL_monthly.csv
Successfully processed UAL
Processing AAL...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


AAL: Removed 437 invalid rows out of 5284
Saved /content/export_daily/AAL_daily.csv
Saved /content/export_weekly/AAL_weekly.csv
Saved /content/export_monthly/AAL_monthly.csv
Successfully processed AAL
Processing GOLD...
Saved /content/export_daily/GOLD_daily.csv
Saved /content/export_weekly/GOLD_weekly.csv
Saved /content/export_monthly/GOLD_monthly.csv
Successfully processed GOLD
Processing ABBV...
ABBV: Removed 2265 invalid rows out of 5284
Saved /content/export_daily/ABBV_daily.csv


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_weekly/ABBV_weekly.csv
Saved /content/export_monthly/ABBV_monthly.csv
Successfully processed ABBV
Processing HAL...
Saved /content/export_daily/HAL_daily.csv
Saved /content/export_weekly/HAL_weekly.csv
Saved /content/export_monthly/HAL_monthly.csv
Successfully processed HAL
Processing TGT...
Saved /content/export_daily/TGT_daily.csv
Saved /content/export_weekly/TGT_weekly.csv
Saved /content/export_monthly/TGT_monthly.csv
Successfully processed TGT
Processing MU...


  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


Saved /content/export_daily/MU_daily.csv
Saved /content/export_weekly/MU_weekly.csv
Saved /content/export_monthly/MU_monthly.csv
Successfully processed MU
Processing HOOD...
HOOD: Removed 4423 invalid rows out of 5284
Saved /content/export_daily/HOOD_daily.csv
Saved /content/export_weekly/HOOD_weekly.csv
Saved /content/export_monthly/HOOD_monthly.csv
Successfully processed HOOD
Processing DKNG...
DKNG: Removed 3984 invalid rows out of 5284
Saved /content/export_daily/DKNG_daily.csv
DKNG_weekly: Removed 5 invalid rows out of 282
Saved /content/export_weekly/DKNG_weekly.csv
Saved /content/export_monthly/DKNG_monthly.csv
Successfully processed DKNG
Processing COP...
Saved /content/export_daily/COP_daily.csv
Saved /content/export_weekly/COP_weekly.csv
Saved /content/export_monthly/COP_monthly.csv
Successfully processed COP
Processing CVE...
CVE: Removed 1488 invalid rows out of 5284
Saved /content/export_daily/CVE_daily.csv
Saved /content/export_weekly/CVE_weekly.csv
Saved /content/export_

  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()
  monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()


In [None]:
import pandas as pd
import os

# Use same tickers as cell 1
tickers = ["SPY", "TQQQ", "QQQ", "SQQQ", "EEM", "XLF", "GLD", "XLE", "EFA", "GDX", "XLK",
           "TLT", "XLV", "FXI", "XLY",
           "XLI", "XLU", "XLP", "XLB"]

# Create export directories if they don't exist
os.makedirs('/content/export_daily', exist_ok=True)
os.makedirs('/content/export_weekly', exist_ok=True)
os.makedirs('/content/export_monthly', exist_ok=True)

def load_data(ticker):
    file_path = f'{ticker}_OHLC.csv'
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])

    # Ensure column order for daily data
    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    df = df[column_order]

    return df

def aggregate_data(df):
    agg_rules = {
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last'
    }

    weekly_data = df.set_index('Date').resample('W').agg(agg_rules).reset_index()
    monthly_data = df.set_index('Date').resample('M').agg(agg_rules).reset_index()

    column_order = ['Date', 'Open', 'High', 'Low', 'Close']
    weekly_data = weekly_data[column_order]
    monthly_data = monthly_data[column_order]

    return weekly_data, monthly_data

def save_to_csv(data, ticker, period):
    if period == 'daily':
        file_path = f'/content/export_daily/{ticker}_daily.csv'
    elif period == 'weekly':
        file_path = f'/content/export_weekly/{ticker}_weekly.csv'
    else:
        file_path = f'/content/export_monthly/{ticker}_monthly.csv'

    data.to_csv(file_path, index=False)
    print(f"Saved {file_path}")

def process_ticker(ticker):
    print(f"Processing {ticker}...")
    try:
        # Load and save daily data
        df = load_data(ticker)
        save_to_csv(df, ticker, 'daily')

        # Process and save weekly/monthly data
        weekly_data, monthly_data = aggregate_data(df)
        save_to_csv(weekly_data, ticker, 'weekly')
        save_to_csv(monthly_data, ticker, 'monthly')

        print(f"Successfully processed {ticker}")
    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")

# Process all tickers
for ticker in tickers:
    process_ticker(ticker)

In [None]:
from google.colab import files
import os

def count_and_download_files(directory, period):
    file_count = len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])
    print(f"Found {file_count} {period} files")

    # Download files
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):
            files.download(filepath)

print("\nCounting and downloading files...\n")
count_and_download_files('/content/export_monthly', 'monthly')

In [None]:
from google.colab import files
import os

def count_and_download_files(directory, period):
    file_count = len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])
    print(f"Found {file_count} {period} files")

    # Download files
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):
            files.download(filepath)

print("\nCounting and downloading files...\n")
count_and_download_files('/content/export_daily', 'daily')
count_and_download_files('/content/export_weekly', 'weekly')
count_and_download_files('/content/export_monthly', 'monthly')

In [None]:
import pandas as pd

# Load data from CSV
def load_data(file_path):
    # Load the CSV file
    df = pd.read_csv(file_path)
    # Ensure 'Date' column is in datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    # Set 'Date' as the index
    df.set_index('Date', inplace=True)
    return df

# Aggregate data to weekly and monthly levels
def aggregate_data(df):
    # Weekly aggregation (sum)
    weekly_data = df.resample('W').sum()

    # Monthly aggregation (mean)
    monthly_data = df.resample('ME').mean()

    return weekly_data, monthly_data

# Save aggregated data to CSV
def save_to_csv(data, file_name):
    data.to_csv(file_name)
    print(f"Saved {file_name}")

# Main program
if __name__ == "__main__":
    # Path to the input CSV file
    input_file = "/content/"  # Replace with your file path

    print("Loading data from CSV...")
    df = load_data(input_file)
    print("Daily Data:")
    print(df.head())

    print("\nAggregating data to weekly and monthly levels...")
    weekly_data, monthly_data = aggregate_data(df)

    print("\nWeekly Data (Sum):")
    print(weekly_data.head())

    print("\nMonthly Data (Mean):")
    print(monthly_data.head())

    # Save aggregated data to CSV files
    save_to_csv(weekly_data, "XLB_weekly_data.csv")
    save_to_csv(monthly_data, "XLB_monthly_data.csv")
    print("\nAggregated data saved to CSV files.")
