In [None]:
# Define API key and base URL
api_key = "FDyVl7GLwOsIvkhy4fp1v3oEpCxHyyPp"
base_url_open_close = "https://api.polygon.io/v1/open-close"
base_url_financials = "https://api.polygon.io/vX/reference/financials"

# Read the existing CSV file to determine the last date
existing_file_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\ntnx_data_raw.csv"
existing_data = pd.read_csv(existing_file_path, delimiter=';', header=0)

# Ensure the 'Date' column is in datetime format and get the last date
existing_data['Date'] = pd.to_datetime(existing_data['Date'])

# Define ticker and date range
ticker = "NTNX"
start_date = "2022-10-18"
end_date = "2022-12-31"

# Convert date strings to datetime objects
start = datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.strptime(end_date, "%Y-%m-%d")

# Initialize an empty list to hold all stock data
all_stock_data = []

In [39]:
# Initialize API call counter
api_call_count = 0
# Function to handle API call rate limit
def check_rate_limit():
    global api_call_count
    api_call_count += 1
    if api_call_count == 5:
        print("Pausing for 1 minute to respect API rate limit...")
        time.sleep(60)  # Pause for 1 minute
        api_call_count = 0

In [40]:
# Function to fetch open/close data for a specific ticker and date
def fetch_stock_data(ticker, date):
    url = f"{base_url_open_close}/{ticker}/{date}"
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        if data.get("status") == "OK":
            stock_data = {
                "Symbol": data.get("symbol"),
                "Date": data.get("from"),
                "Close": data.get("close"),
                "Open": data.get("open"),
                "High": data.get("high"),
                "Low": data.get("low"),
                "Volume": data.get("volume")
            }
            return stock_data
        else:
            print(f"No trading data for {ticker} on {date}.")
            return None
    else:
        print(f"Failed to fetch data for {ticker} on {date}: {response.status_code}")
        return None

In [41]:
# Function to fetch financial report filing dates
def fetch_filing_dates(ticker):
    filing_dates = set()
    url = f"{base_url_financials}?ticker={ticker}&limit=100&apiKey={api_key}"
    
    while url:
        response = requests.get(url)
        check_rate_limit()  # Check and handle API rate limit
        
        if response.status_code == 200:
            data = response.json()
            for item in data.get('results', []):
                filing_date = item.get('filing_date')
                if filing_date:
                    filing_dates.add(filing_date)  # Collect filing dates
                    
            url = data.get('next_url')  # Check if there's a next page to fetch
        else:
            print(f"Failed to fetch financial data for {ticker}: {response.status_code}")
            url = None  # Stop the loop if there's an error

    return filing_dates


# Fetch financial report filing dates
filing_dates = fetch_filing_dates(ticker)

In [None]:
# Loop over each date in the range
current_date = start
api_call_count = 0

while current_date <= end:
    date_str = current_date.strftime("%Y-%m-%d")
    
    # Fetch stock data for the current date
    stock_data = fetch_stock_data(ticker, date_str)
    check_rate_limit()
    
    if stock_data:
        # Check if a financial report was released on this date
        stock_data["financialRelease"] = 1 if date_str in filing_dates else 0
        
        all_stock_data.append(stock_data)

    # Increment API call count
    api_call_count += 1

    # After every 5 API calls, pause for 1 minute to respect the API limit
    # if api_call_count == 5:
    #     print("Pausing for 1 minute to respect API rate limit...")
    #     time.sleep(60)
    #     api_call_count = 0

    # Move to the next date
    current_date += timedelta(days=1)

In [43]:
# Create a DataFrame from the collected data
df = pd.DataFrame(all_stock_data)

In [None]:
# Define the output path for the CSV file
# file_name = '_data'
# output_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\{file_name}.csv"

# Append the DataFrame to the existing CSV file, avoiding duplicates
if not df.empty:
    df.to_csv(existing_file_path, mode='a', index=False, header=False, sep=';')

print(f"Data has been appended to {existing_file_path}") 

## Adding parameters to the data model
**ema20** = exponential moving average 20 days.
**ema50** = exponential moving average 50 days.
**openHigher** = If Open > (1,01 * Previous day Close) then 1 else 0.
**strongClose** = function=(High + Low) / 2. If Close > 'function' then 1 else 0.
**averageVolume** = Average volume from past 200 days. 200 simple moving average for volume.
**strongVolume** = If Volume > (2 * averageVolume) then 1 else 0.
**strongVolume6MoPrior** = If strongVolume is equal to 1 more than 3 times in the past 126 days then 1 else 0.
**strongVolumeAfterFiling** = If strongVolume is equal to 1 more 2 times in the following 5 days after financialRelease = 1 then 1 else 0.
**accVolume** = If strongVolume = 1 and strongVolume6MoPrior = 1 and strongVolumeAfterFiling = 1 then 1 else 0.
**uptrend** = If ema50 > (ema50 50 days prior) then 1 else 0.

In [15]:
# Define the path to your existing CSV file
csv_file_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\ntnx_data_raw.csv"
# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path, delimiter=';', header=0)

In [None]:
print(df)

In [None]:
# Define the path to your existing CSV file
csv_file_path = r"C:\Users\SamuliMustonen\Documents\Ready Solutions\Docs\StockTrading\Data\ntnx_data_raw.csv"
# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path, delimiter=';', header=0)

# Function to calculate EMA
def calculate_ema(data, window):
    """Calculate the Exponential Moving Average (EMA) for a given window."""
    # Calculate EMA using ewm() and handle insufficient data gracefully
    ema = data['Close'].ewm(span=window, adjust=False).mean()
    
    # Replace initial values with NaN for insufficient data
    if len(data) < window:
        ema[:window] = None  # Set initial window values to None (NaN)
    
    return ema

# Calculate EMA20 and EMA50
df['ema20'] = calculate_ema(df, 20)
df['ema50'] = calculate_ema(df, 50)
# Round ema20 and ema50 to 2 decimal places
df['ema20'] = df['ema20'].round(2)
df['ema50'] = df['ema50'].round(2)

# **openHigher** = If Open > (1.01 * Previous day Close) then 1 else 0
df['openHigher'] = (df['Open'] > (1.01 * df['Close'].shift(1))).astype(int)

# **strongClose** = (High + Low) / 2. If Close > 'function' then 1 else 0
df['function'] = (df['High'] + df['Low']) / 2
df['strongClose'] = (df['Close'] > df['function']).astype(int)

# Convert Volume to integer format
df['Volume'] = df['Volume'].astype(int)  # Convert volume to integer

# **averageVolume** = Average volume from past 200 days (SMA for volume)
df['averageVolume'] = df['Volume'].rolling(window=200).mean()

# **strongVolume** = If Volume > (2 * averageVolume) then 1 else 0
df['strongVolume'] = (df['Volume'] > 2 * df['averageVolume']).astype(int)

# **strongVolume6MoPrior** = If strongVolume is 1 more than 3 times in the past 126 days then 1 else 0
df['strongVolume6MoPrior'] = df['strongVolume'].rolling(window=126).sum().shift(1)
df['strongVolume6MoPrior'] = (df['strongVolume6MoPrior'] > 3).astype(int)

# **strongVolumeAfterFiling** = If strongVolume is 1 more than 2 times in the 5 days after financialRelease = 1 then 1 else 0
df['strongVolumeAfterFiling'] = 0
for i in range(len(df) - 5):
    if df.loc[i, 'financialRelease'] == 1:
        if df.loc[i+1:i+5, 'strongVolume'].sum() > 2:
            df.loc[i+1:i+5, 'strongVolumeAfterFiling'] = 1

# **accVolume** = If strongVolume = 1, strongVolume6MoPrior = 1, and strongVolumeAfterFiling = 1 then 1 else 0
df['accVolume'] = ((df['strongVolume'] == 1) & 
                   (df['strongVolume6MoPrior'] == 1) & 
                   (df['strongVolumeAfterFiling'] == 1)).astype(int)

# **uptrend** = If ema50 > (ema50 50 days prior) then 1 else 0
df['uptrend'] = (df['ema50'] > df['ema50'].shift(50)).astype(int)

# Drop the 'function' column as it's intermediate
df.drop(columns=['function'], inplace=True)

# Save the updated DataFrame to a new CSV file with a timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
new_csv_file_path = f"C:/Users/SamuliMustonen/Documents/Ready Solutions/Docs/StockTrading/Data/ntnx_data_model_{timestamp}.csv"
df.to_csv(new_csv_file_path, index=False)

print(f"Updated data with new columns saved to {new_csv_file_path}")
