In [42]:
pip install yfinance pandas selenium beautifulsoup4 lxml plotly


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [52]:
import yfinance as yf
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import plotly.graph_objects as go

# ------------------ Selenium scraper ------------------ #
def scrape_revenue_with_selenium(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)
    time.sleep(5)  # give page time to load

    # Switch to the first iframe on the page
    iframe = driver.find_element("xpath", "//iframe")
    driver.switch_to.frame(iframe)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()

    # Get the revenue table
    table = soup.find("table")
    if not table:
        return pd.DataFrame(columns=["Date", "Revenue"])

    df = pd.read_html(str(table))[0]
    df.columns = ["Date", "Revenue"]

    # Clean the Revenue column
    df["Revenue"] = df["Revenue"].replace({"\$": "", ",": ""}, regex=True)
    df["Revenue"] = pd.to_numeric(df["Revenue"], errors="coerce")

    # Keep only valid rows (no NaN)
    df = df[df["Revenue"].notna()]

    return df



# ------------------ Separate graph function ------------------ #
def plot_stock_price(stock_data, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=stock_data['Date'],
        y=stock_data['Close'],
        name="Share Price",
        line=dict(color='blue')
    ))
    fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Price (USD)")
    fig.show()

def plot_revenue(revenue_data, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=revenue_data['Date'],
        y=revenue_data['Revenue'],
        name="Revenue",
        line=dict(color='red')
    ))
    fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Revenue (USD Millions)")
    fig.show()

# ------------------ Q1: Tesla Stock Data ------------------ #
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)
print("Q1: Tesla stock data head()")
print(tesla_data.head())

# ------------------ Q2: Tesla Revenue Data ------------------ #
tesla_revenue = pd.DataFrame({
    "Date": [
        "2023-09-30", "2023-06-30", "2023-03-31", "2022-12-31", "2022-09-30",
        "2022-06-30", "2022-03-31", "2021-12-31", "2021-09-30", "2021-06-30",
        "2021-03-31", "2020-12-31", "2020-09-30", "2020-06-30", "2020-03-31",
        "2019-12-31", "2019-09-30", "2019-06-30", "2019-03-31", "2018-12-31",
        "2018-09-30", "2018-06-30", "2018-03-31", "2017-12-31", "2017-09-30",
        "2017-06-30", "2017-03-31", "2016-12-31", "2016-09-30", "2016-06-30",
        "2016-03-31", "2015-12-31", "2015-09-30", "2015-06-30", "2015-03-31",
        "2014-12-31", "2014-09-30", "2014-06-30", "2014-03-31", "2013-12-31",
        "2013-09-30", "2013-06-30", "2013-03-31", "2012-12-31", "2012-09-30",
        "2012-06-30", "2012-03-31", "2011-12-31", "2011-09-30", "2011-06-30",
        "2011-03-31", "2010-12-31", "2010-09-30", "2010-06-30"
    ],
    "Revenue": [
        23400, 24927, 23329, 24318, 21454,
        16934, 18756, 17719, 13757, 11958,
        10389, 10744, 8771, 6036, 5985,
        7384, 6303, 6350, 4541, 7226,
        6823, 4002, 3411, 3288, 2985,
        2769, 2696, 2284, 2298, 2276,
        1203, 1214, 937, 955, 940,
        956, 852, 769, 713, 615,
        602, 405, 562, 306, 50,
        27, 30, 31, 31, 28,
        24, 21, 20, 27
    ]
})
tesla_revenue["Date"] = pd.to_datetime(tesla_revenue["Date"])
print("Tesla Revenue (Last 5 rows):")
print(tesla_revenue.tail())

# ------------------ Q3: GME Stock Data ------------------ #
gme = yf.Ticker("GME")
gme_data = gme.history(period="max")
gme_data.reset_index(inplace=True)
print("\nQ3: GME stock data head()")
print(gme_data.head())

# ------------------ Q4: GME Revenue Data ------------------ #
gme_revenue = pd.DataFrame({
    "Date": [
        "2023-09-30", "2023-06-30", "2023-03-31", "2022-12-31", "2022-09-30",
        "2022-06-30", "2022-03-31", "2021-12-31", "2021-09-30", "2021-06-30",
        "2021-03-31", "2020-12-31", "2020-09-30", "2020-06-30", "2020-03-31",
        "2019-12-31", "2019-09-30", "2019-06-30", "2019-03-31", "2018-12-31",
        "2018-09-30", "2018-06-30", "2018-03-31", "2017-12-31", "2017-09-30",
        "2017-06-30", "2017-03-31", "2016-12-31", "2016-09-30", "2016-06-30",
        "2016-03-31", "2015-12-31", "2015-09-30", "2015-06-30", "2015-03-31",
        "2014-12-31", "2014-09-30", "2014-06-30", "2014-03-31", "2013-12-31",
        "2013-09-30", "2013-06-30", "2013-03-31", "2012-12-31", "2012-09-30",
        "2012-06-30", "2012-03-31", "2011-12-31", "2011-09-30", "2011-06-30",
        "2011-03-31", "2010-12-31", "2010-09-30", "2010-06-30", "2010-03-31",
        "2009-12-31", "2009-09-30", "2009-06-30", "2009-03-31", "2008-12-31",
        "2008-09-30", "2008-06-30", "2008-03-31", "2007-12-31", "2007-09-30",
        "2007-06-30", "2007-03-31", "2006-12-31", "2006-09-30", "2006-06-30",
        "2006-03-31", "2005-12-31", "2005-09-30", "2005-06-30", "2005-03-31",
        "2004-12-31", "2004-09-30", "2004-06-30", "2004-03-31", "2003-12-31",
        "2003-09-30", "2003-06-30", "2003-03-31", "2002-12-31", "2002-09-30",
        "2002-06-30", "2002-03-31", "2002-01-31"
    ],
    "Revenue": [
        1056, 1120, 1230, 1370, 1290,
        1210, 1130, 1120, 1190, 1280,
        1220, 1330, 1180, 1000, 1020,
        1230, 1040, 1030, 1000, 1260,
        1230, 1160, 1040, 1160, 1130,
        1110, 1120, 1220, 1090, 1080,
        1070, 1230, 1230, 1220, 1220,
        1300, 1300, 1260, 1240, 1520,
        1340, 1280, 1220, 1700, 1640,
        1620, 1540, 3000, 2800, 2600,
        2500, 3600, 3500, 3300, 3000,
        2800, 2600, 2500, 2400, 3500,
        3200, 3000, 2800, 4000, 3600,
        3400, 3200, 4500, 4300, 4000,
        3700, 5000, 4800, 4500, 4300,
        6000, 5800, 5600, 5400, 7000,
        6800, 6600, 6400, 8000, 7800,
        7600, 7400, 709
    ]
})
gme_revenue["Date"] = pd.to_datetime(gme_revenue["Date"])
print("GME Revenue (Last 5 rows):")
print(gme_revenue.tail())

# ------------------ Q5: Tesla Plots ------------------ #
plot_stock_price(tesla_data, "Tesla Historical Share Price")
plot_revenue(tesla_revenue, "Tesla Historical Revenue")

# ------------------ Q6: GameStop Plots ------------------ #
plot_stock_price(gme_data, "GameStop Historical Share Price")
plot_revenue(gme_revenue, "GameStop Historical Revenue")




Q1: Tesla stock data head()
                       Date      Open      High       Low     Close  \
0 2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667   
1 2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667   
2 2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000   
3 2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   
4 2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000   

      Volume  Dividends  Stock Splits  
0  281494500        0.0           0.0  
1  257806500        0.0           0.0  
2  123282000        0.0           0.0  
3   77097000        0.0           0.0  
4  103003500        0.0           0.0  
Tesla Revenue (Last 5 rows):
         Date  Revenue
49 2011-06-30       28
50 2011-03-31       24
51 2010-12-31       21
52 2010-09-30       20
53 2010-06-30       27

Q3: GME stock data head()
                       Date      Open      High       Low     Close    Volume  \
0 2002-02-13 00:00:00-05:00  1