In [1]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score

# Configuration for ML
TICKERS = ["AAPL", "NVDA", "META", "GME", "TSLA"]
HORIZONS = [2, 5, 60, 250, 1000]

In [2]:
def make_dashboard(stock_data, revenue_data, stock):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data.Date), y=stock_data.Close.astype("float"), name="Price"), secondary_y=False)
    fig.add_trace(go.Bar(x=pd.to_datetime(revenue_data.Date), y=revenue_data.Revenue.astype("float"), name="Revenue"), secondary_y=True)
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Price ($US)", secondary_y=False)
    fig.update_yaxes(title_text="Revenue ($US Millions)", secondary_y=True)
    fig.update_layout(showlegend=True, title=f"{stock} Historical Dashboard")
    fig.show()

def scrape_revenue(url):
    html_data = requests.get(url).text
    soup = BeautifulSoup(html_data, "html.parser")
    # Finding the table with Revenue
    tables = soup.find_all("table")
    # Usually the second table [1] contains quarterly/annual revenue in these assignments
    temp_df = pd.read_html(str(tables[1]))[0]
    temp_df.columns = ["Date", "Revenue"]
    # Cleaning
    temp_df["Revenue"] = temp_df['Revenue'].str.replace(',|\$', "", regex=True)
    temp_df.dropna(inplace=True)
    temp_df = temp_df[temp_df['Revenue'] != ""]
    return temp_df

# Prediction functions from your logic
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:, 1]
    preds[preds >= 0.6] = 1
    preds[preds < 0.6] = 0
    return pd.concat([test["Target"], pd.Series(preds, index=test.index, name="Predictions")], axis=1)

def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []
    if len(data) <= start: start = int(len(data) * 0.5) 
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:i+step].copy()
        all_predictions.append(predict(train, test, predictors, model))
    return pd.concat(all_predictions) if all_predictions else pd.DataFrame()

In [3]:
# Question 1.2: Tesla Stock Data
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)
print("Tesla Stock Data (First 5 Rows):")
display(tesla_data.head())

# Question 1.3: Tesla Revenue Data (Web Scraping)
tesla_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
tesla_revenue = scrape_revenue(tesla_url)
print("\nTesla Revenue Data (Last 5 Rows):")
display(tesla_revenue.tail())

Tesla Stock Data (First 5 Rows):


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0



Tesla Revenue Data (Last 5 Rows):


  temp_df = pd.read_html(str(tables[1]))[0]


Unnamed: 0,Date,Revenue
48,2010-09-30,31
49,2010-06-30,28
50,2010-03-31,21
52,2009-09-30,46
53,2009-06-30,27


In [4]:
# Question 1.4: GME Stock Data
gme = yf.Ticker("GME")
gme_data = gme.history(period="max")
gme_data.reset_index(inplace=True)
print("GME Stock Data (First 5 Rows):")
display(gme_data.head())

# Question 1.5: GME Revenue Data (Web Scraping)
gme_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"
gme_revenue = scrape_revenue(gme_url)
print("\nGME Revenue Data (Last 5 Rows):")
display(gme_revenue.tail())

GME Stock Data (First 5 Rows):


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620128,1.69335,1.603296,1.691666,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.683251,1.687459,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.61592,1.66221,1.603296,1.66221,6892800,0.0,0.0



GME Revenue Data (Last 5 Rows):


  temp_df = pd.read_html(str(tables[1]))[0]


Unnamed: 0,Date,Revenue
57,2006-01-31,1667
58,2005-10-31,534
59,2005-07-31,416
60,2005-04-30,475
61,2005-01-31,709


In [5]:
# Question 1.6: Tesla Dashboard
make_dashboard(tesla_data, tesla_revenue, 'Tesla')

# Question 1.7: GameStop Dashboard
make_dashboard(gme_data, gme_revenue, 'GameStop')

In [6]:
def add_indicators(data):
    new_predictors = []
    for horizon in HORIZONS:
        rolling_averages = data.rolling(horizon).mean()
        data[f"Close_Ratio_{horizon}"] = data["Close"] / rolling_averages["Close"]
        data[f"Trend_{horizon}"] = data.shift(1).rolling(horizon).sum()["Target"]
        new_predictors += [f"Close_Ratio_{horizon}", f"Trend_{horizon}"]
    return data.dropna(), new_predictors

results = {}
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)

for ticker in TICKERS:
    # Preparation
    stock_obj = yf.Ticker(ticker)
    df = stock_obj.history(period="max")
    df["Tomorrow"] = df["Close"].shift(-1)
    df["Target"] = (df["Tomorrow"] > df["Close"]).astype(int)
    df = df.loc["1990-01-01":].copy()
    
    df_clean, predictors = add_indicators(df)
    predictions = backtest(df_clean, model, predictors)
    
    if not predictions.empty:
        score = precision_score(predictions["Target"], predictions["Predictions"])
        results[ticker] = {"score": score, "signals": predictions["Predictions"].sum()}
        print(f"{ticker} Analysis Complete. Precision: {score:.4f}")

# Final Summary Table
summary_df = pd.DataFrame.from_dict(results, orient='index')
summary_df.columns = ["Precision Score", "Buy Signals Sent"]
summary_df["Precision Score"] = summary_df["Precision Score"].apply(lambda x: f"{x:.2%}")
display(summary_df)

AAPL Analysis Complete. Precision: 0.5251
NVDA Analysis Complete. Precision: 0.5814
META Analysis Complete. Precision: 0.5493
GME Analysis Complete. Precision: 0.4938
TSLA Analysis Complete. Precision: 0.5000


Unnamed: 0,Precision Score,Buy Signals Sent
AAPL,52.51%,577.0
NVDA,58.14%,301.0
META,54.93%,142.0
GME,49.38%,401.0
TSLA,50.00%,70.0
