In [5]:
#Getting data for EC481 Research Paper
import yfinance as yf
import pandas as pd

# List of tickers
tickers = ['AAPL', 'MSFT', 'GOOGL']  # Replace with your desired tickers

# Initialize a dictionary to store data for each ticker
data = {}

# Fetch financial data and calculate variables
for ticker in tickers:
    # Download historical data
    stock_data = yf.download(ticker, start='2022-01-01', end='2024-12-31')

    # Fetch financial statements and other metrics
    stock = yf.Ticker(ticker)
    financials = stock.financials
    balance_sheet = stock.balance_sheet
    info = stock.info

    # Check available labels in balance_sheet to adjust calculations
    balance_sheet_labels = balance_sheet.index

    # Calculate metrics (handling missing data with .get())
    profitability = financials.get('Net Income', pd.Series([None])) / balance_sheet.get('Total Assets', pd.Series([None]))  # ROA as profitability metric
    liquidity_ratio = balance_sheet.get('Total Current Assets', pd.Series([None])) / balance_sheet.get('Total Current Liabilities', pd.Series([None]))  # Current Ratio
    leverage_ratio = balance_sheet.get('Total Liab', pd.Series([None])) / balance_sheet.get('Total Stockholder Equity', pd.Series([None]))  # Debt-to-Equity
    growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
    size = info.get('marketCap', None)  # Market capitalization as a proxy for size
    eps = info.get('trailingEps', None)  # Earnings per share (trailing)

    # Organize data for this ticker
    data[ticker] = {
        'Profitability (ROA)': profitability.iloc[-1] if not profitability.empty else None,
        'Liquidity Ratio': liquidity_ratio.iloc[-1] if not liquidity_ratio.empty else None,
        'Leverage Ratio': leverage_ratio.iloc[-1] if not leverage_ratio.empty else None,
        'Growth (Revenue % Change)': growth,
        'Size (Market Cap)': size,
        'EPS': eps,
        'Year': 2021
    }

# Convert to a DataFrame
df = pd.DataFrame(data).T

# Save to CSV
df.to_csv('financial_data_with_metrics.csv', index=True)



[*********************100%***********************]  1 of 1 completed
  growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
[*********************100%***********************]  1 of 1 completed
  growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
[*********************100%***********************]  1 of 1 completed
  growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth


In [6]:
# Getting data for EC481 Research Paper
import yfinance as yf
import pandas as pd

# List of tickers
tickers = ['AAPL', 'MSFT', 'GOOGL']  # Replace with your desired tickers

# Initialize a dictionary to store data for each ticker
data = {}

# Fetch financial data and calculate variables
for ticker in tickers:
    # Download historical data with annual frequency
    stock_data = yf.download(ticker, start='2022-01-01', end='2024-12-31', interval='1y')

    # Fetch financial statements and other metrics
    stock = yf.Ticker(ticker)
    financials = stock.financials
    balance_sheet = stock.balance_sheet
    info = stock.info

    # Check available labels in balance_sheet to adjust calculations
    balance_sheet_labels = balance_sheet.index

    # Calculate metrics (handling missing data with .get())
    profitability = financials.get('Net Income', pd.Series([None])) / balance_sheet.get('Total Assets', pd.Series([None]))  # ROA as profitability metric
    liquidity_ratio = balance_sheet.get('Total Current Assets', pd.Series([None])) / balance_sheet.get('Total Current Liabilities', pd.Series([None]))  # Current Ratio
    leverage_ratio = balance_sheet.get('Total Liab', pd.Series([None])) / balance_sheet.get('Total Stockholder Equity', pd.Series([None]))  # Debt-to-Equity
    growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
    size = info.get('marketCap', None)  # Market capitalization as a proxy for size
    eps = info.get('trailingEps', None)  # Earnings per share (trailing)
    dividend = info.get('dividendRate', None)  # Dividend
    
    # Organize data for this ticker
    data[ticker] = {
        'Profitability (ROA)': profitability.iloc[-1] if not profitability.empty else None,
        'Liquidity Ratio': liquidity_ratio.iloc[-1] if not liquidity_ratio.empty else None,
        'Leverage Ratio': leverage_ratio.iloc[-1] if not leverage_ratio.empty else None,
        'Growth (Revenue % Change)': growth,
        'Size (Market Cap)': size,
        'EPS': eps,
        'Dividend': dividend,
        'Year': 2021
    }

# Convert to a DataFrame
df = pd.DataFrame(data).T

# Save to CSV
df.to_csv('financial_data_with_metrics_and_dividend.csv', index=True)


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: ReadTimeout(ReadTimeoutError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=10)"))
  growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['MSFT']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1y 2022-01-01 -> 2024-12-31) (Yahoo error = "Invalid input - interval=1y is not supported. Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo]")')
  growth = financials.get('Total Revenue', pd.Series([None])).pct_change().iloc[-1] if not financials.empty else None  # Revenue growth
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['GOOGL']: YFPricesMissingError('$%ticker%: possibly delisted; n

In [8]:
import requests
import pandas as pd
import time

# Set your Alpha Vantage API key here
API_KEY = '36U78LF5HDZY0S51'

def get_financial_data(symbol):
    base_url = "https://www.alphavantage.co/query"
    data = {}
    
    # Financial Ratios Endpoint
    ratios_params = {
        "function": "OVERVIEW",
        "symbol": symbol,
        "apikey": API_KEY
    }
    response = requests.get(base_url, params=ratios_params)
    if response.status_code == 200:
        ratios_data = response.json()
        
        # Manually calculate current ratio, debt to equity, and revenue growth if necessary fields are available
        try:
            # Current Ratio
            current_assets = float(ratios_data.get("TotalCurrentAssets", 0))
            current_liabilities = float(ratios_data.get("TotalCurrentLiabilities", 0))
            data['current_ratio'] = current_assets / current_liabilities if current_liabilities else None

            # Debt to Equity
            total_liabilities = float(ratios_data.get("TotalLiabilities", 0))
            equity = float(ratios_data.get("ShareholderEquity", 0))
            data['debt_to_equity'] = total_liabilities / equity if equity else None

            # Revenue Growth (assumes previous year's revenue is available as "RevenueTTM" or similar)
            revenue_current = float(ratios_data.get("RevenueTTM", 0))
            revenue_previous = float(ratios_data.get("Revenue", 0))
            data['revenue_growth'] = ((revenue_current - revenue_previous) / revenue_previous * 100) if revenue_previous else None

            # Other metrics
            data['return_on_assets'] = ratios_data.get("ReturnOnAssetsTTM")
            data['return_on_equity'] = ratios_data.get("ReturnOnEquityTTM")
            data['market_cap'] = ratios_data.get("MarketCapitalization")
            data['eps'] = ratios_data.get("EPS")
            data['dividend_yield'] = ratios_data.get("DividendYield")

        except (TypeError, ValueError) as e:
            print(f"Error calculating data for {symbol}: {e}")
        
    else:
        print(f"Error fetching data for {symbol}: {response.status_code}")

    return data


def save_to_csv(data, filename='financial_data.csv'):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def main(symbols):
    financial_data = []

    for symbol in symbols:
        data = get_financial_data(symbol)
        if data:
            # Add company symbol and year
            data['symbol'] = symbol
            data['year'] = pd.Timestamp.now().year
            financial_data.append(data)

        # Alpha Vantage has a 5-requests-per-minute limit on the free tier
        time.sleep(2)  # Wait to avoid hitting the rate limit

    save_to_csv(financial_data)

# Example usage with a list of stock symbols
symbols = ["AAPL", "MSFT", "GOOGL"]  # Replace with the list of companies you want
main(symbols)


Data saved to financial_data.csv
