In [12]:
import pandas as pd
import numpy as np

# Create a new DataFrame with the FOA entry
data = pd.DataFrame({
    'Symbol': ['FOA'],
    'LastBuySignalDate': ['2024-09-09'],
    'LastBuySignalPrice': [12.26],
    'IsCurrentlyBought': [False],
    'ConsecutiveLosses': [0],
    'LastTradedDate': [np.nan],
    'UpProbability': [0.72],
    'LastSellPrice': [np.nan],
    'PositionSize': [np.nan]
})

# Print the new data
print("New data:")
print(data)

# Save the data to the parquet file
data.to_parquet("_Live_trades.parquet")

print("\nData has been created and saved to trading_data.parquet")

New data:
  Symbol LastBuySignalDate  LastBuySignalPrice  IsCurrentlyBought  \
0    FOA        2024-09-09               12.26              False   

   ConsecutiveLosses  LastTradedDate  UpProbability  LastSellPrice  \
0                  0             NaN           0.72            NaN   

   PositionSize  
0           NaN  

Data has been created and saved to trading_data.parquet


In [1]:
import pandas as pd

df = pd.read_parquet('_Live_trades.parquet')

print(df.head())

  Symbol LastBuySignalDate  LastBuySignalPrice  IsCurrentlyBought  \
0    FOA        2024-09-09               12.26               True   

   ConsecutiveLosses LastTradedDate  UpProbability  LastSellPrice  \
0                  0            NaT           0.72            NaN   

   PositionSize  
0           0.0  


In [2]:
import pandas as pd

df = pd.read_parquet('Data\PriceData\AAPL.parquet')

print(df.tail())

                 Open       High        Low      Close  Adj Close    Volume
Date                                                                       
2024-09-10  218.92000  221.48000  216.73000  220.11000  220.11000  51591000
2024-09-11  221.46001  223.09000  217.89000  222.66000  222.66000  44587100
2024-09-12  222.50000  223.55000  219.82001  222.77000  222.77000  37498200
2024-09-13  223.58000  224.03999  221.91000  222.50000  222.50000  36766600
2024-09-16  216.53999  217.22000  213.92000  216.32001  216.32001  59288400


In [4]:
import pandas as pd
import os
from datetime import datetime, timedelta

# Define the path to the data folder
data_folder = os.path.join("Data", "PriceData")

# Function to get the last trading day based on today's date
def get_last_trading_day(reference_date=None):
    """
    Returns the last trading day based on the reference_date.
    If today is Saturday, returns Friday.
    If today is Sunday, returns Friday.
    Otherwise, returns the previous trading day.
    """
    if reference_date is None:
        reference_date = datetime.today()
    else:
        reference_date = reference_date

    # Weekday: Monday=0, Sunday=6
    weekday = reference_date.weekday()

    if weekday == 5:  # Saturday
        last_trading_day = reference_date - timedelta(days=1)
    elif weekday == 6:  # Sunday
        last_trading_day = reference_date - timedelta(days=2)
    else:
        last_trading_day = reference_date

    # Ensure it's a weekday
    if last_trading_day.weekday() >= 5:
        # If it's Saturday or Sunday, adjust to Friday
        last_trading_day -= timedelta(days=last_trading_day.weekday() - 4)
    
    return last_trading_day.date()

# Get the expected latest trading date
expected_latest_date = get_last_trading_day()

# Get list of all .parquet files in the folder
parquet_files = [f for f in os.listdir(data_folder) if f.endswith('.parquet')]

print(f"Found {len(parquet_files)} parquet files in {data_folder}")
print(f"Expected latest trading date: {expected_latest_date}")

# Initialize lists to hold file statuses
stale_files = []
up_to_date_files = []
file_latest_dates = {}

# Iterate over each parquet file
for file in parquet_files:
    file_path = os.path.join(data_folder, file)
    try:
        # Read the parquet file
        df = pd.read_parquet(file_path)
        
        # Check if 'Date' is the index
        if isinstance(df.index, pd.DatetimeIndex):
            latest_date = df.index.max().date()
        elif 'Date' in df.columns:
            # If 'Date' is a column
            df['Date'] = pd.to_datetime(df['Date'])
            latest_date = df['Date'].max().date()
        else:
            print(f"File {file} does not have a 'Date' column or index.")
            continue
        
        # Store the latest date
        file_latest_dates[file] = latest_date
        
        # Compare with expected latest date
        if latest_date < expected_latest_date:
            stale_files.append((file, latest_date))
        else:
            up_to_date_files.append((file, latest_date))
            
    except Exception as e:
        print(f"Error processing file {file}: {e}")

# Report Up-to-date Files
print("\n=== Up-to-date Files ===")
for file, date in up_to_date_files:
    print(f"{file}: Latest date = {date}")

# Report Stale Files
print("\n=== Stale Files ===")
for file, date in stale_files:
    print(f"{file}: Latest date = {date}")

# Calculate the percentage of stale files
total_files = len(parquet_files)
stale_count = len(stale_files)
stale_percentage = (stale_count / total_files) * 100 if total_files > 0 else 0

print(f"\nOut of {total_files} files, {stale_count} are out of date.")
print(f"Percentage of stale files: {stale_percentage:.2f}%")


Found 13 parquet files in Data\PriceData
Expected latest trading date: 2024-09-20

=== Up-to-date Files ===

=== Stale Files ===
AAPL.parquet: Latest date = 2024-09-19
AMZN.parquet: Latest date = 2024-09-19
AVGO.parquet: Latest date = 2024-09-19
BRK-B.parquet: Latest date = 2024-09-19
GOOGL.parquet: Latest date = 2024-09-19
JPM.parquet: Latest date = 2024-09-19
LLY.parquet: Latest date = 2024-09-19
META.parquet: Latest date = 2024-09-19
MSFT.parquet: Latest date = 2024-09-19
NVDA.parquet: Latest date = 2024-09-19
NVO.parquet: Latest date = 2024-09-19
TSLA.parquet: Latest date = 2024-09-19
TSM.parquet: Latest date = 2024-09-19

Out of 13 files, 13 are out of date.
Percentage of stale files: 100.00%
