In [59]:
!pip install yfinance pandas numpy

import os
import yfinance as yf
import pandas as pd
import numpy as np

os.makedirs("datasets", exist_ok=True)
print("SETUP COMPLETE")

## What kind of data is possible?

In [60]:
aapl = yf.Ticker("aapl")
stats = aapl.info

stats

{'address1': 'One Apple Park Way',
 'city': 'Cupertino',
 'state': 'CA',
 'zip': '95014',
 'country': 'United States',
 'phone': '(408) 996-1010',
 'website': 'https://www.apple.com',
 'industry': 'Consumer Electronics',
 'industryKey': 'consumer-electronics',
 'industryDisp': 'Consumer Electronics',
 'sector': 'Technology',
 'sectorKey': 'technology',
 'sectorDisp': 'Technology',
 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and p

# Check the earliest available data for each stock

In [61]:
# Define the date range
start_date = '1993-01-29'  # this is we use INFO from spy and it starts on the 1993-01-29
end_date = '2025-03-01'

# Define the Magnificent 7 stocks
magnificent_7 = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "TSLA"]

# Check earliest available data for each stock
print("Checking earliest available data for each stock:")
for ticker in magnificent_7 + ["SPY"]:
    ticker_max = yf.Ticker(ticker).history(period="max")
    if not ticker_max.empty:
        earliest_date = ticker_max.index.min()
        print(f"Earliest available date for {ticker}: {earliest_date}")
    else:
        print(f"No data available for {ticker}")
print("\n")

Checking earliest available data for each stock:
Earliest available date for AAPL: 1980-12-12 00:00:00-05:00
Earliest available date for MSFT: 1986-03-13 00:00:00-05:00
Earliest available date for GOOGL: 2004-08-19 00:00:00-04:00
Earliest available date for AMZN: 1997-05-15 00:00:00-04:00
Earliest available date for META: 2012-05-18 00:00:00-04:00
Earliest available date for NVDA: 1999-01-22 00:00:00-05:00
Earliest available date for TSLA: 2010-06-29 00:00:00-04:00
Earliest available date for SPY: 1993-01-29 00:00:00-05:00




# Stock Metrics Explanation
## Technical Indicators
### Price Data

Open: Opening price of the stock for the trading day
High: Highest price the stock reached during the trading day
Low: Lowest price the stock reached during the trading day
Close: Closing price of the stock for the trading day
Volume: Number of shares traded during the trading day
Daily_Return: Percentage change in price from the previous day

### Momentum and Trend Indicators

RSI_14: Relative Strength Index over 14 periods - measures momentum by comparing the magnitude of recent gains to recent losses (0-100, >70 overbought, <30 oversold)
MA50: 50-day Moving Average - average closing price over the last 50 trading days
MA200: 200-day Moving Average - average closing price over the last 200 trading days
Golden_Cross: Binary indicator (1/0) signaling when MA50 crosses above MA200 (bullish signal)
Death_Cross: Binary indicator (1/0) signaling when MA50 crosses below MA200 (bearish signal)
EMA12: 12-day Exponential Moving Average - gives more weight to recent prices
EMA26: 26-day Exponential Moving Average - gives more weight to recent prices

## Volume Indicators

Volume_Ratio: Current volume divided by the 20-day average volume - indicates unusual trading activity
Money_Flow: Daily return multiplied by volume - indicates the money moving in/out of the stock
Cumulative_Money_Flow: Running sum of Money_Flow - shows overall money trend over time

MACD (Moving Average Convergence Divergence)

MACD_Line: Difference between EMA12 and EMA26 - shows momentum and trend direction
MACD_Signal: 9-day EMA of the MACD_Line - smooths MACD for signal generation
MACD_Histogram: Difference between MACD_Line and MACD_Signal - shows momentum acceleration/deceleration
MACD_Crossover: Binary indicator (1/0) when MACD_Line crosses above MACD_Signal (bullish signal)

## Volatility Indicators

Volatility_20D: Standard deviation of daily returns over 20 days - measures price volatility
BB_Middle: 20-day Moving Average (middle Bollinger Band)
BB_StdDev: Standard deviation of prices over 20 days - used to calculate Bollinger Bands
BB_Upper: Upper Bollinger Band (BB_Middle + 2*BB_StdDev) - potential resistance level
BB_Lower: Lower Bollinger Band (BB_Middle - 2*BB_StdDev) - potential support level
BB_Width: (BB_Upper - BB_Lower) / BB_Middle - indicates expected volatility
BB_Position: (Close - BB_Lower) / (BB_Upper - BB_Lower) - indicates relative position within bands (0-1)

## Support/Resistance and Market Correlation

Rolling_High_20d: Highest price over the last 20 trading days - potential resistance level
Rolling_Low_20d: Lowest price over the last 20 trading days - potential support level
Near_Resistance: Binary indicator (1/0) when price is near 20-day high
Near_Support: Binary indicator (1/0) when price is near 20-day low
SPY_Close: Closing price of the S&P 500 ETF - benchmark for comparison
SPY_Returns: Daily return of the S&P 500 ETF
Covariance: Covariance between stock returns and market returns over 30 days
Market_Variance: Variance of market returns over 30 days
Beta_30d: 30-day rolling Beta - measures stock's volatility compared to the market (>1 more volatile, <1 less volatile)

## Fundamental Metrics
### Valuation Metrics

PE_Ratio: Price-to-Earnings ratio (trailing) - stock price divided by last 12 months earnings per share
Forward_PE: Forward Price-to-Earnings ratio - stock price divided by estimated future 12 months earnings per share
PEG_Ratio: PE ratio divided by expected earnings growth rate - indicates relative valuation accounting for growth
Price_to_Book: Stock price divided by book value per share - indicates valuation relative to accounting value
Dividend_Yield: Annual dividend payments as percentage of stock price

### Size and Value Metrics

Market_Cap: Total market value of the company's outstanding shares
Enterprise_Value: Total company value (market cap + debt - cash) - used for acquisition valuation

### Profitability and Growth Metrics

Profit_Margin: Net profit as a percentage of revenue - indicates efficiency in converting revenue to profit
Return_on_Equity: Net income as a percentage of shareholders' equity - measures profitability relative to equity
Revenue_Growth: Year-over-year percentage increase in company revenue
EPS_Growth: Year-over-year percentage increase in earnings per share

### Financial Health Metrics

Debt_to_Equity: Total debt divided by shareholders' equity - indicates financial leverage
Current_Ratio: Current assets divided by current liabilities - measures short-term liquidity

In [63]:
# Also get S&P 500 data for Beta calculation
spy = yf.Ticker("SPY").history(start=start_date, end=end_date, interval="1d")

# Create a dictionary to store all dataframes
stock_dfs = {}

# Create an empty dataframe to store all stocks combined
all_stocks_df = pd.DataFrame()


# Define the technical indicator calculation functions
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))


def calculate_technical_indicators(df, spy_data):
    # 1. RSI (Relative Strength Index)
    df['RSI_14'] = calculate_rsi(df['Close'])  # 0-100 momentum oscillator; >70 overbought, <30 oversold

    # 2. Moving Averages and Golden/Death Cross
    df['MA50'] = df['Close'].rolling(window=50).mean()  # 50-day moving average - medium-term trend
    df['MA200'] = df['Close'].rolling(window=200).mean()  # 200-day moving average - long-term trend

    # Golden Cross (MA50 crosses above MA200) and Death Cross (MA50 crosses below MA200)
    df['Golden_Cross'] = ((df['MA50'] > df['MA200']) & (df['MA50'].shift() <= df['MA200'].shift())).astype(int)  # Bullish signal
    df['Death_Cross'] = ((df['MA50'] < df['MA200']) & (df['MA50'].shift() >= df['MA200'].shift())).astype(int)  # Bearish signal

    # 3. Volume Indicators
    df['Daily_Return'] = df['Close'].pct_change()  # Daily percentage price change
    df['Volume_Ratio'] = df['Volume'] / df['Volume'].rolling(window=20).mean()  # Relative volume compared to 20-day average
    df['Money_Flow'] = df['Daily_Return'] * df['Volume']  # Indicates buying/selling pressure
    df['Cumulative_Money_Flow'] = df['Money_Flow'].cumsum()  # Accumulated money flow over time

    # 4. MACD (Moving Average Convergence Divergence)
    df['EMA12'] = df['Close'].ewm(span=12, adjust=False).mean()  # 12-day exponential moving average
    df['EMA26'] = df['Close'].ewm(span=26, adjust=False).mean()  # 26-day exponential moving average
    df['MACD_Line'] = df['EMA12'] - df['EMA26']  # Difference between fast and slow EMAs
    df['MACD_Signal'] = df['MACD_Line'].ewm(span=9, adjust=False).mean()  # 9-day EMA of MACD line
    df['MACD_Histogram'] = df['MACD_Line'] - df['MACD_Signal']  # Difference between MACD and signal line
    df['MACD_Crossover'] = ((df['MACD_Line'] > df['MACD_Signal']) &
                            (df['MACD_Line'].shift() <= df['MACD_Signal'].shift())).astype(int)  # Bullish MACD signal

    # 5. Bollinger Bands
    df['BB_Middle'] = df['Close'].rolling(window=20).mean()  # 20-day moving average (middle band)
    df['BB_StdDev'] = df['Close'].rolling(window=20).std()  # Standard deviation of prices
    df['BB_Upper'] = df['BB_Middle'] + (df['BB_StdDev'] * 2)  # Upper band (resistance)
    df['BB_Lower'] = df['BB_Middle'] - (df['BB_StdDev'] * 2)  # Lower band (support)
    df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']  # Indicator of volatility
    df['BB_Position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])  # Position within bands (0-1)

    # 6. Support/Resistance Levels (using rolling highs and lows)
    df['Rolling_High_20d'] = df['High'].rolling(window=20).max()  # Potential resistance level
    df['Rolling_Low_20d'] = df['Low'].rolling(window=20).min()  # Potential support level
    df['Near_Resistance'] = (df['High'] >= df['Rolling_High_20d'] * 0.98).astype(int)  # Price approaching resistance
    df['Near_Support'] = (df['Low'] <= df['Rolling_Low_20d'] * 1.02).astype(int)  # Price approaching support

    # 7. Beta (correlation with market)
    # Add SPY data
    df['SPY_Close'] = spy_data['Close']  # S&P 500 ETF closing price
    df['SPY_Returns'] = spy_data['Close'].pct_change()  # S&P 500 daily returns

    # Calculate 30-day rolling beta
    df['Covariance'] = df['Daily_Return'].rolling(window=30).cov(df['SPY_Returns'])  # Covariance with market
    df['Market_Variance'] = df['SPY_Returns'].rolling(window=30).var()  # Variance of market returns
    df['Beta_30d'] = df['Covariance'] / df['Market_Variance']  # Stock volatility relative to market (>1 more volatile)

    # Daily returns and volatility
    df['Volatility_20D'] = df['Daily_Return'].rolling(window=20).std()  # 20-day standard deviation of returns

    return df


# Process each stock
for ticker in magnificent_7:
    print(f"Processing {ticker}...")

    # Get stock data
    stock = yf.Ticker(ticker)

    # Get historical market data (daily)
    df = stock.history(start=start_date, end=end_date, interval="1d")

    # Skip if dataframe is empty (no data available)
    if df.empty:
        print(f"No data available for {ticker}, skipping...")
        continue

    # Calculate all technical indicators
    df = calculate_technical_indicators(df, spy)

    # Convert the DatetimeIndex to a standard 'YYYY-MM-DD' format
    df.index = df.index.strftime('%Y-%m-%d')

    # Reset index to make Date a regular column
    df.reset_index(inplace=True)

    # Rename index column to Date
    df.rename(columns={'index': 'Date'}, inplace=True)

    # Add a ticker column to identify the stock
    df['Ticker'] = ticker

    # Reorder columns so Date is first and Ticker is second
    cols = df.columns.tolist()
    cols.remove('Date')
    cols.remove('Ticker')
    new_cols = ['Date', 'Ticker'] + cols
    df = df[new_cols]

    # Sort by date
    df = df.sort_values(by='Date')

    # Store the dataframe in the dictionary
    stock_dfs[ticker] = df

    # Get fundamental data for the stock
    try:
        # Get additional fundamental metrics
        info = stock.info

        # Add fundamental metrics as columns with the same value for all rows
        fundamental_metrics = {
            'PE_Ratio': info.get('trailingPE', np.nan),  # Price divided by trailing 12-month earnings
            'Forward_PE': info.get('forwardPE', np.nan),  # Price divided by estimated future earnings
            'PEG_Ratio': info.get('pegRatio', np.nan),  # PE ratio divided by growth rate - lower suggests undervaluation
            'Price_to_Book': info.get('priceToBook', np.nan),  # Price relative to book value - under 1 potentially undervalued
            'Dividend_Yield': info.get('dividendYield', np.nan) * 100 if info.get('dividendYield') else np.nan,  # Annual dividend as % of share price
            'Market_Cap': info.get('marketCap', np.nan),  # Total market value of all shares
            'Enterprise_Value': info.get('enterpriseValue', np.nan),  # Market cap + debt - cash (acquisition value)
            'Profit_Margin': info.get('profitMargins', np.nan) * 100 if info.get('profitMargins') else np.nan,  # Profit as % of revenue
            'Return_on_Equity': info.get('returnOnEquity', np.nan) * 100 if info.get('returnOnEquity') else np.nan,  # Profit as % of shareholder equity
            'Revenue_Growth': info.get('revenueGrowth', np.nan) * 100 if info.get('revenueGrowth') else np.nan,  # Year-over-year revenue growth
            'Debt_to_Equity': info.get('debtToEquity', np.nan),  # Debt relative to equity - financial leverage
            'Current_Ratio': info.get('currentRatio', np.nan),  # Current assets / current liabilities - short-term liquidity
            'EPS_Growth': info.get('earningsGrowth', np.nan) * 100 if info.get('earningsGrowth') else np.nan  # Year-over-year earnings per share growth
        }

        # Add fundamental metrics to dataframe
        for metric, value in fundamental_metrics.items():
            df[metric] = value

        print(f"Added fundamental metrics for {ticker}")
    except Exception as e:
        print(f"Error getting fundamental data for {ticker}: {e}")

    # Save the individual stock dataframe to a CSV file
    df.to_csv(f"datasets/{ticker}_technical_analysis.csv", index=False)
    print(f"Saved {ticker} data to datasets/{ticker}_technical_analysis.csv")

    # Add to the combined dataframe
    if all_stocks_df.empty:
        all_stocks_df = df.copy()
    else:
        all_stocks_df = pd.concat([all_stocks_df, df])

# Sort the combined dataframe by date
all_stocks_df = all_stocks_df.sort_values(by=['Date', 'Ticker'])

# Save the combined dataframe to a CSV file
all_stocks_df.to_csv("datasets/M7.csv", index=False)
print("Saved combined data to datasets/Magnificent_7_technical_analysis.csv")

print("Processing complete!")
all_stocks_df

Processing AAPL...
Added fundamental metrics for AAPL
Saved AAPL data to datasets/AAPL_technical_analysis.csv
Processing MSFT...
Added fundamental metrics for MSFT
Saved MSFT data to datasets/MSFT_technical_analysis.csv
Processing GOOGL...
Added fundamental metrics for GOOGL
Saved GOOGL data to datasets/GOOGL_technical_analysis.csv
Processing AMZN...
Added fundamental metrics for AMZN
Saved AMZN data to datasets/AMZN_technical_analysis.csv
Processing META...
Added fundamental metrics for META
Saved META data to datasets/META_technical_analysis.csv
Processing NVDA...
Added fundamental metrics for NVDA
Saved NVDA data to datasets/NVDA_technical_analysis.csv
Processing TSLA...
Added fundamental metrics for TSLA
Saved TSLA data to datasets/TSLA_technical_analysis.csv
Saved combined data to datasets/Magnificent_7_technical_analysis.csv
Processing complete!


Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits,RSI_14,...,Price_to_Book,Dividend_Yield,Market_Cap,Enterprise_Value,Profit_Margin,Return_on_Equity,Revenue_Growth,Debt_to_Equity,Current_Ratio,EPS_Growth
0,1993-01-29,AAPL,0.436564,0.443810,0.427507,0.431130,266100800,0.0,0.0,,...,49.485126,44.0,3299078569984,3460245225472,24.295000,136.520000,4.0,145.000,0.923,10.1
0,1993-01-29,MSFT,1.679264,1.688859,1.645679,1.660072,39424000,0.0,0.0,,...,9.354489,87.0,2831156772864,2857456631808,35.428002,34.291000,12.3,33.998,1.351,10.2
1,1993-02-01,AAPL,0.429319,0.443810,0.429319,0.443810,240553600,0.0,0.0,,...,49.485126,44.0,3299078569984,3460245225472,24.295000,136.520000,4.0,145.000,0.923,10.1
1,1993-02-01,MSFT,1.655275,1.688860,1.640881,1.679264,42854400,0.0,0.0,,...,9.354489,87.0,2831156772864,2857456631808,35.428002,34.291000,12.3,33.998,1.351,10.2
2,1993-02-02,AAPL,0.440187,0.445621,0.436564,0.436564,182336000,0.0,0.0,,...,49.485126,44.0,3299078569984,3460245225472,24.295000,136.520000,4.0,145.000,0.923,10.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5165,2025-02-28,GOOGL,168.485941,170.413729,166.578150,170.084106,48130600,0.0,0.0,23.980630,...,6.150552,48.0,2006519119872,1954435301376,28.604000,32.908002,11.8,8.655,1.837,30.9
3213,2025-02-28,META,658.039978,669.630005,642.599976,668.200012,17534200,0.0,0.0,33.797395,...,8.372945,35.0,1529013075968,1487056928768,37.909000,37.140998,20.6,27.250,2.978,51.4
8076,2025-02-28,MSFT,392.660004,397.630005,386.570007,396.989990,32845700,0.0,0.0,37.934182,...,9.354489,87.0,2831156772864,2857456631808,35.428002,34.291000,12.3,33.998,1.351,10.2
6566,2025-02-28,NVDA,118.019997,125.089996,116.400002,124.919998,389091100,0.0,0.0,45.030306,...,33.517956,4.0,2650613219328,2577372020736,55.848000,119.177010,77.9,12.946,4.440,83.6
