In [1]:
#Import dependencies
# Import dependencies
import yfinance as yf
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import hvplot.pandas
import holoviews as hv
hv.extension('bokeh')

In [2]:
# Set pandas display options
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [3]:
# Fetch S&P 500 tickers
#Define dates
start_date = '2000-01-01'
end_date = '2024-10-14'

#Get tickers from wikipedia
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

# Filter out Class B shares that have a '.B' in the ticker name
sp500_tickers = [ticker for ticker in sp500_tickers if '.B' not in ticker]

print(f"Initial total S&P 500 tickers: {len(sp500_tickers)}")

Initial total S&P 500 tickers: 501


In [4]:
# Download historical prices
historical_prices = yf.download(sp500_tickers, start=start_date, end=end_date)['Adj Close']

# Fill NaN values with 0
historical_prices.fillna(0, inplace=True)

# Check if data was downloaded for all tickers
print(f"Successfully downloaded historical_prices: {len(historical_prices.columns)} out of {len(sp500_tickers)}")

# Display tickers that were successfully downloaded
downloaded_tickers = historical_prices.columns.get_level_values(0).unique()
print(f"Successfully downloaded tickers: {len(downloaded_tickers)} out of {len(sp500_tickers)}")
# Display data Frame
historical_prices.head()

[*********************100%***********************]  501 of 501 completed


Successfully downloaded historical_prices: 501 out of 501
Successfully downloaded tickers: 501 out of 501


Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,43.463,0.844,0.0,0.0,8.288,1.278,0.0,16.275,28.215,6.307,...,0.0,11.353,0.0,6.848,18.036,0.0,4.634,0.0,25.028,0.0
2000-01-04 00:00:00+00:00,40.143,0.773,0.0,0.0,8.051,1.271,0.0,14.909,26.787,6.242,...,0.0,10.926,0.0,7.006,17.69,0.0,4.541,0.0,24.667,0.0
2000-01-05 00:00:00+00:00,37.653,0.784,0.0,0.0,8.037,1.389,0.0,15.204,27.178,6.143,...,0.0,11.505,0.0,7.276,18.655,0.0,4.564,0.0,25.139,0.0
2000-01-06 00:00:00+00:00,36.219,0.716,0.0,0.0,8.318,1.375,0.0,15.328,26.435,6.176,...,0.0,12.043,0.0,7.209,19.619,0.0,4.526,0.0,23.778,0.0
2000-01-07 00:00:00+00:00,39.237,0.75,0.0,0.0,8.407,1.451,0.0,16.073,27.178,6.274,...,0.0,11.647,0.0,7.209,19.562,0.0,4.425,0.0,23.514,0.0


In [10]:
ticker_data = yf.Ticker("AAPL")

# Read json data
# ticker_data.history(period="1d", interval="1m")["Stock Splits"].unique()

In [12]:
recommendations = ticker_data.recommendations
display(recommendations)

Unnamed: 0,period,strongBuy,buy,hold,sell,strongSell
0,0m,11,21,6,0,0
1,-1m,12,24,11,1,0
2,-2m,12,23,10,1,0
3,-3m,10,24,7,1,0


In [15]:
historical_data = ticker_data.history(period="max")

historical_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-12-12 00:00:00-05:00,0.099,0.099,0.099,0.099,469033600,0.000,0.000
1980-12-15 00:00:00-05:00,0.094,0.094,0.094,0.094,175884800,0.000,0.000
1980-12-16 00:00:00-05:00,0.087,0.087,0.087,0.087,105728000,0.000,0.000
1980-12-17 00:00:00-05:00,0.089,0.089,0.089,0.089,86441600,0.000,0.000
1980-12-18 00:00:00-05:00,0.092,0.092,0.092,0.092,73449600,0.000,0.000
...,...,...,...,...,...,...,...
2024-10-14 00:00:00-04:00,228.700,231.730,228.600,231.300,39882100,0.000,0.000
2024-10-15 00:00:00-04:00,233.610,237.490,232.370,233.850,64751400,0.000,0.000
2024-10-16 00:00:00-04:00,231.600,232.120,229.840,231.780,34082200,0.000,0.000
2024-10-17 00:00:00-04:00,233.430,233.850,230.520,232.150,32993800,0.000,0.000


In [9]:
start_date = 2020
end_date = 2021
filtered_recommendations = recommendations[(recommendations.index >= start_date) & (recommendations.index <= end_date)]
print(filtered_recommendations)

Empty DataFrame
Columns: [period, strongBuy, buy, hold, sell, strongSell]
Index: []


In [16]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Step 1: Extract and Preprocess Data
ticker = yf.Ticker("AAPL")
recommendations = ticker.recommendations
historical_data = ticker.history(period="max")

# Convert recommendations to a DataFrame
recommendations = recommendations.reset_index()

# Step 2: Feature Engineering
# Create features from historical data
historical_data['MA_10'] = historical_data['Close'].rolling(window=10).mean()
historical_data['MA_50'] = historical_data['Close'].rolling(window=50).mean()
historical_data['Price_Change'] = historical_data['Close'].pct_change()

# Merge recommendations with historical data
data = pd.merge(recommendations, historical_data, left_on='Date', right_index=True, how='inner')

# Create target variable
data['Target'] = data.apply(lambda row: 'buy' if row['buy'] > row['hold'] and row['buy'] > row['sell'] else ('sell' if row['sell'] > row['buy'] and row['sell'] > row['hold'] else 'hold'), axis=1)

# Drop rows with missing values
data = data.dropna()

# Step 3: Model Training
# Define features and target
features = ['MA_10', 'MA_50', 'Price_Change']
X = data[features]
y = data['Target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 4: Evaluation
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

KeyError: 'Date'