In [1]:
import os
import pandas as pd
import requests
import time
import json

In [2]:
# Set up data folder and file paths
data_folder = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
csv_file = os.path.join(data_folder, "spy_tickers_sample.csv")
json_file = os.path.join(data_folder, "income_statements.json")

# Define which fields to keep
desired_fields = [
    'symbol',
    'date',
    'revenue',
    'grossProfit',
    'operatingIncome',
    'netIncome',
    'eps',
    'ebitda',
    'costOfRevenue',
    'operatingExpenses'
]

# Try loading from cached JSON if it exists
if os.path.exists(json_file):
    print("Loading data from JSON cache...")
    with open(json_file, 'r') as f:
        records = json.load(f)
else:
    # Load tickers
    df = pd.read_csv(csv_file)
    tickers = df['Ticker'].dropna().tolist()

    API_KEY = 'YwnbHRjcJvf6Md2OPoKbSRGHlzZ7hjR6'
    records = []

    # Fetch most recent income statement per ticker
    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/income-statement/{ticker}?limit=1&apikey={API_KEY}'
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            if data:
                row = data[0]
                row['symbol'] = ticker
                filtered_row = {k: row.get(k, None) for k in desired_fields}
                records.append(filtered_row)
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {ticker}: {e}")
        time.sleep(1)  # Rate limiting

    # Save to JSON
    with open(json_file, 'w') as f:
        json.dump(records, f, indent=2)
    print("Saved data to JSON cache.")

# Convert to DataFrame
income_df = pd.DataFrame(records)

# Show sample
print(income_df.head())


Loading data from JSON cache...
         date symbol reportedCurrency         cik fillingDate  \
0  2024-09-28   AAPL              USD  0000320193  2024-11-01   
1  2024-06-30   MSFT              USD  0000789019  2024-07-30   
2  2024-12-31   AMZN              USD  0001018724  2025-02-07   
3  2025-01-26   NVDA              USD  0001045810  2025-02-26   
4  2024-12-31  GOOGL              USD  0001652044  2025-02-05   

          acceptedDate calendarYear period       revenue  costOfRevenue  ...  \
0  2024-11-01 06:01:36         2024     FY  391035000000   210352000000  ...   
1  2024-07-30 16:06:22         2024     FY  245122000000    74114000000  ...   
2  2025-02-06 18:40:29         2024     FY  637959000000   326288000000  ...   
3  2025-02-26 16:48:33         2025     FY  130497000000    32639000000  ...   
4  2025-02-04 20:41:40         2024     FY  350018000000   146306000000  ...   

   incomeBeforeTaxRatio  incomeTaxExpense     netIncome  netIncomeRatio  \
0              0.3157

In [3]:
# Paths
data_folder  = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
csv_file     = os.path.join(data_folder, "spy_tickers_sample.csv")
output_file  = os.path.join(data_folder, "income_growth.json")

# API setup
API_KEY  = "YwnbHRjcJvf6Md2OPoKbSRGHlzZ7hjR6"
BASE_URL = "https://financialmodelingprep.com/stable/income-statement-growth"

# Try loading from cache
if os.path.exists(output_file):
    print("Loading growth data from JSON cache...")
    with open(output_file, "r") as f:
        growth_records = json.load(f)

else:
    print("No cache found — fetching growth data from API...")
    # Load tickers
    df       = pd.read_csv(csv_file)
    tickers  = df['Ticker'].tolist()
    growth_records = []

    # Fetch and flatten all periods (Solution B)
    for ticker in tickers:
        url = f"{BASE_URL}?symbol={ticker}&apikey={API_KEY}"
        try:
            resp = requests.get(url)
            resp.raise_for_status()
            data = resp.json()

            if isinstance(data, list) and data:
                for period in data:
                    period['symbol'] = ticker
                    growth_records.append(period)
            else:
                print(f"  • no growth data for {ticker}")

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {ticker}: {e}")

        time.sleep(1)  # respect rate limits

    # Save to JSON cache
    os.makedirs(data_folder, exist_ok=True)
    with open(output_file, "w") as f:
        json.dump(growth_records, f, indent=2)
    print(f"Saved {len(growth_records)} records to JSON cache.")

# Convert to DataFrame and inspect
df_growth = pd.DataFrame(growth_records)
print("Columns in growth DataFrame:", df_growth.columns.tolist())
df_growth.shape

Loading growth data from JSON cache...
Columns in growth DataFrame: ['symbol', 'date', 'fiscalYear', 'period', 'reportedCurrency', 'growthRevenue', 'growthCostOfRevenue', 'growthGrossProfit', 'growthGrossProfitRatio', 'growthResearchAndDevelopmentExpenses', 'growthGeneralAndAdministrativeExpenses', 'growthSellingAndMarketingExpenses', 'growthOtherExpenses', 'growthOperatingExpenses', 'growthCostAndExpenses', 'growthInterestIncome', 'growthInterestExpense', 'growthDepreciationAndAmortization', 'growthEBITDA', 'growthOperatingIncome', 'growthIncomeBeforeTax', 'growthIncomeTaxExpense', 'growthNetIncome', 'growthEPS', 'growthEPSDiluted', 'growthWeightedAverageShsOut', 'growthWeightedAverageShsOutDil', 'growthEBIT', 'growthNonOperatingIncomeExcludingInterest', 'growthNetInterestIncome', 'growthTotalOtherIncomeExpensesNet', 'growthNetIncomeFromContinuingOperations', 'growthOtherAdjustmentsToNetIncome', 'growthNetIncomeDeductions']


(100, 34)

In [4]:
# Define paths
data_folder = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
tickers_csv_path = os.path.join(data_folder, "spy_tickers_sample.csv")
output_json_path = os.path.join(data_folder, "ratios.json")

# Try loading from cache
if os.path.exists(output_json_path):
    print("Loading ratios from JSON cache...")
    with open(output_json_path, "r") as f:
        all_ratios = json.load(f)
else:
    print("No cache found — fetching ratios from API...")
    tickers_df = pd.read_csv(tickers_csv_path)
    tickers = tickers_df['Ticker'].dropna().unique().tolist()

    api_key = "YwnbHRjcJvf6Md2OPoKbSRGHlzZ7hjR6"
    base_url = "https://financialmodelingprep.com/api/v3/ratios/{}?limit=1&apikey={}"

    all_ratios = []

    for ticker in tickers:
        try:
            url = base_url.format(ticker, api_key)
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            if data:
                all_ratios.append(data[0])
            time.sleep(1)
        except Exception as e:
            print(f"Error fetching {ticker}: {e}")

    # Save to cache
    with open(output_json_path, "w") as f:
        json.dump(all_ratios, f, indent=2)
    print(f"Saved {len(all_ratios)} records to JSON cache.")

# Convert to DataFrame
df_ratios = pd.DataFrame(all_ratios)
print(df_ratios.head())


Loading ratios from JSON cache...
  symbol        date calendarYear period  currentRatio  quickRatio  cashRatio  \
0   AAPL  2024-09-28         2024     FY      0.867313    0.826007   0.169753   
1   MSFT  2025-06-30         2025     FY      1.353446    1.346804   0.214151   
2   AMZN  2024-12-31         2024     FY      1.063735    0.873054   0.439049   
3   NVDA  2025-01-26         2025     FY      4.439851    3.881310   0.475924   
4  GOOGL  2024-12-31         2024     FY      1.836931    1.836931   0.263302   

   daysOfSalesOutstanding  daysOfInventoryOutstanding  operatingCycle  ...  \
0               61.832560                   12.642571       74.475130  ...   
1               90.568517                    3.898054       94.466572  ...   
2               31.725573                   38.273274       69.998847  ...   
3               64.512786                  112.724042      177.236828  ...   
4               54.580336                    0.000000       54.580336  ...   

   priceTo

In [14]:
import os, json, time, requests
import pandas as pd

# 1. Set up paths
data_folder = os.path.join(os.path.expanduser("~/Desktop/Trading"), "Data")
csv_file    = os.path.join(data_folder, "spy_tickers_sample.csv")
json_file   = os.path.join(data_folder, "company_info.json")

# 2. Define the fields you want to keep
desired_fields = [
    "symbol", "companyName", "marketCap", "sector", "industry", "beta",
    "price", "lastAnnualDividend", "volume", "exchange", "exchangeShortName",
    "country", "isEtf", "isFund"
]

# 3. Load or fetch company info
records = []

if os.path.exists(json_file):
    print("Loading company info from JSON cache...")
    with open(json_file, "r") as f:
        records = json.load(f)

else:
    print("No cache found — fetching company info from API...")
    df      = pd.read_csv(csv_file)
    tickers = df['Ticker'].dropna().unique().tolist()
    print(f"Found {len(tickers)} unique tickers.")

    API_KEY  = "YwnbHRjcJvf6Md2OPoKbSRGHlzZ7hjR6"
    BASE_URL = "https://financialmodelingprep.com/api/v3/profile/{}?apikey={}"

    failed = []

    for i, ticker in enumerate(tickers, 1):
        url = BASE_URL.format(ticker, API_KEY)
        try:
            resp = requests.get(url)
            resp.raise_for_status()
            data = resp.json()
            if isinstance(data, list) and data:
                row = data[0]
                filtered = {k: row.get(k, None) for k in desired_fields}
                records.append(filtered)
            else:
                print(f"  • No profile data for {ticker}")
                failed.append(ticker)
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {ticker}: {e}")
            failed.append(ticker)
        time.sleep(1)

    os.makedirs(data_folder, exist_ok=True)
    with open(json_file, "w") as f:
        json.dump(records, f, indent=2)
    print(f"Saved {len(records)} company records to cache.")
    if failed:
        print(f"{len(failed)} tickers failed to fetch: {failed[:5]}...")

# 4. Convert to DataFrame and inspect
company_df = pd.DataFrame(records)
company_df.drop_duplicates(subset="symbol", inplace=True)

print("Columns returned:", company_df.columns.tolist())
print(company_df.head(101))


No cache found — fetching company info from API...
Found 100 unique tickers.
Saved 100 company records to cache.
Columns returned: ['symbol', 'companyName', 'marketCap', 'sector', 'industry', 'beta', 'price', 'lastAnnualDividend', 'volume', 'exchange', 'exchangeShortName', 'country', 'isEtf', 'isFund']
   symbol                   companyName marketCap                  sector  \
0    AAPL                    Apple Inc.      None              Technology   
1    MSFT         Microsoft Corporation      None              Technology   
2    AMZN              Amazon.com, Inc.      None       Consumer Cyclical   
3    NVDA            NVIDIA Corporation      None              Technology   
4   GOOGL                 Alphabet Inc.      None  Communication Services   
..    ...                           ...       ...                     ...   
95     WM        Waste Management, Inc.      None             Industrials   
96    PSX                   Phillips 66      None                  Energy   
97 

In [5]:
# Convert columns to numeric, coercing errors to NaN
df_growth['priceEarningsRatio'] = pd.to_numeric(df_growth['priceEarningsRatio'], errors='coerce')
df_growth['growthEPSDiluted'] = pd.to_numeric(df_growth['growthEPSDiluted'], errors='coerce')

# Drop rows with missing values in P_E or growthEPSDiluted
reg_df = df_growth.dropna(subset=['priceEarningsRatio', 'growthEPSDiluted']).copy()

# Log-transform the P/E ratio
reg_df['ln_PE'] = np.log(reg_df['P_E'])

# Setup regression
X = sm.add_constant(reg_df['growthEPSDiluted'])
y = reg_df['ln_PE']

model = sm.OLS(y, X).fit()
print(model.summary())

print(''' the coefficient is the expected p/e when the growth is 0. the coefficient beta is the expected percentage increase in the p/e per unit increase in the independent var, aka the growth  ''')

KeyError: 'P_E'