In [2]:
pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.6-py312-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.18.2.tar.gz (949 kB)
     ---------------------------------------- 0.0/949.2 kB ? eta -:--:--
     ---------------------------------------- 0.0/949.2 kB ? eta -:--:--
     ---------------------------------------- 0.0/949.2 kB ? eta -:--:--
     ----------- ---------------------------- 262.1/949.2 kB ? eta -:--:--
     -------------------- ----------------- 524.3/949.2 kB 1.2 MB/s eta 0:00:01
     -------------------------------------- 949.2/949.2 kB 1.3 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: st



# Question 1: Extracting Tesla Stock Data Using yfinanceme

In [33]:
import yfinance as yf

# Download Tesla stock data
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")

# Reset the index
tesla_data.reset_index(inplace=True)

# Display the first five rows
tesla_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


# 2. Extracting Tesla Revenue Data Using Web Scraping

In [16]:
pip install  html5lib

Collecting html5lib
  Downloading html5lib-1.1-py2.py3-none-any.whl.metadata (16 kB)
Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
Installing collected packages: html5lib
Successfully installed html5lib-1.1
Note: you may need to restart the kernel to use updated packages.


In [53]:
# Import required libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Step 1: Request HTML content from IBM-hosted page
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
response = requests.get(url)
html_data = response.text

# Step 2: Parse HTML using BeautifulSoup
soup = BeautifulSoup(html_data, "html.parser")

# Step 3: Extract Tesla revenue table (first table on the page)
tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])
table = soup.find_all("tbody")[0]
rows = table.find_all("tr")

data = []
for row in rows:
    cols = row.find_all("td")
    if len(cols) == 2:
        date = cols[0].text.strip()
        revenue = cols[1].text.strip().replace("$", "").replace(",", "")
        if revenue and date:
            data.append({"Date": date, "Revenue": revenue})

# Step 4: Convert to DataFrame and clean data
tesla_revenue = pd.DataFrame(data)
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(',|\$', '', regex=True)
tesla_revenue = tesla_revenue[tesla_revenue["Revenue"] != ""]
tesla_revenue.dropna(inplace=True)
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].astype(float)

# Step 5: Convert Date column to datetime and filter date range
tesla_revenue["Date"] = pd.to_datetime(tesla_revenue["Date"])
filtered_data = tesla_revenue.loc[
    (tesla_revenue["Date"] <= "2010-09-30") & (tesla_revenue["Date"] >= "2009-06-30")
]




  tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(',|\$', '', regex=True)


Date        Revenue
2010-09-30  31.0
2010-06-30  28.0
2010-03-31  21.0
2009-09-30  46.0
2009-06-30  27.0


 # 3. Extracting GameStop Stock Data Using yfinance 

In [54]:
gamestop = yf.Ticker("GME")
gme_data = gamestop.history(period="max")
gme_data.reset_index(inplace=True)
gme_data.head()


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620129,1.69335,1.603296,1.691667,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.68325,1.687458,1.658001,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.61592,1.66221,1.603296,1.66221,6892800,0.0,0.0


# Question 4 - Extracting GameStop Revenue Data Using Webscraping

In [47]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gme_revenue():
    url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html"
    
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all tables - we'll look for the annual revenue table
        tables = soup.find_all('table')
        
        # The revenue table is typically the second table on the page
        if len(tables) < 2:
            print("Could not find revenue tables.")
            return None
            
        revenue_table = tables[1]  # Second table usually contains annual revenue
        
        # Extract data
        revenue_data = []
        rows = revenue_table.find_all('tr')
        
        for row in rows[1:]:  # Skip header row
            cols = row.find_all('td')
            if len(cols) >= 2:
                year = cols[0].get_text(strip=True)
                revenue = cols[1].get_text(strip=True).replace('$', '').replace(',', '')
                
                try:
                    revenue = float(revenue)
                except ValueError:
                    revenue = None
                
                revenue_data.append({'Year': year, 'Revenue': revenue})
        
        df = pd.DataFrame(revenue_data)
        return df
    
    except Exception as e:
        print(f"Error: {e}")
        return None

# Execute function
gme_revenue = scrape_gme_revenue()

if gme_revenue is not None:
    print("Last 5 rows of GME Revenue Data:")
    print(gme_revenue.tail())


Last 5 rows of GME Revenue Data:
          Year  Revenue
57  2006-01-31   1667.0
58  2005-10-31    534.0
59  2005-07-31    416.0
60  2005-04-30    475.0
61  2005-01-31    709.0


# Question 5 - Tesla Stock and Revenue Dashboard

In [56]:
pip install plotly

Collecting plotly
  Downloading plotly-6.2.0-py3-none-any.whl.metadata (8.5 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.46.0-py3-none-any.whl.metadata (11 kB)
Downloading plotly-6.2.0-py3-none-any.whl (9.6 MB)
   ---------------------------------------- 0.0/9.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/9.6 MB ? eta -:--:--
   - -------------------------------------- 0.3/9.6 MB ? eta -:--:--
   -- ------------------------------------- 0.5/9.6 MB 2.4 MB/s eta 0:00:04
   ------- -------------------------------- 1.8/9.6 MB 3.7 MB/s eta 0:00:03
   ------------- -------------------------- 3.1/9.6 MB 4.4 MB/s eta 0:00:02
   ------------------- -------------------- 4.7/9.6 MB 5.0 MB/s eta 0:00:01
   ----------------------- ---------------- 5.8/9.6 MB 5.1 MB/s eta 0:00:01
   ----------------------------- ---------- 7.1/9.6 MB 5.2 MB/s eta 0:00:01
   --------------------------------- ------ 8.1/9.6 MB 5.2 MB/s eta 0:00:01
   ---------------------



In [61]:
# Step 1: Install Plotly (only run if not already installed)
# !pip install plotly

# Step 2: Import required libraries
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go

# Step 3: Download Tesla stock data
tsla = yf.Ticker("TSLA")
tsla_data = tsla.history(period="max")
tsla_data.reset_index(inplace=True)

# Remove timezone from stock 'Date' column
tsla_data["Date"] = tsla_data["Date"].dt.tz_localize(None)

# Step 4: Scrape Tesla revenue data from hosted HTML
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Extract the first table (Tesla)
table = soup.find_all("tbody")[0]
rows = table.find_all("tr")

# Build DataFrame from table
revenue_data = []
for row in rows:
    cols = row.find_all("td")
    if len(cols) == 2:
        date = cols[0].text.strip()
        revenue = cols[1].text.strip().replace("$", "").replace(",", "")
        if date and revenue:
            revenue_data.append({"Date": date, "Revenue": revenue})

tesla_revenue = pd.DataFrame(revenue_data)
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(',|\$', '', regex=True)
tesla_revenue = tesla_revenue[tesla_revenue["Revenue"] != ""]
tesla_revenue.dropna(inplace=True)
tesla_revenue["Revenue"] = tesla_revenue["Revenue"].astype(float)
tesla_revenue["Date"] = pd.to_datetime(tesla_revenue["Date"])

# Step 5: Filter stock data to match revenue timeline
stock_data = tsla_data[tsla_data["Date"] >= tesla_revenue["Date"].min()]

# Step 6: Plot Stock Price (Interactive)
fig_stock = go.Figure()
fig_stock.add_trace(go.Scatter(
    x=stock_data['Date'], y=stock_data['Close'],
    mode='lines', name='Closing Price',
    line=dict(color='royalblue')
))
fig_stock.update_layout(
    title='Historical Share Price',
    xaxis_title='Date',
    yaxis_title='Stock Price ($)',
    xaxis_rangeslider_visible=True,
    plot_bgcolor='rgba(240,240,240,0.95)',
    font=dict(color='darkblue'),
)
fig_stock.show()

# Step 7: Plot Revenue (Interactive)
fig_revenue = go.Figure()
fig_revenue.add_trace(go.Scatter(
    x=tesla_revenue['Date'], y=tesla_revenue['Revenue'],
    mode='lines+markers', name='Revenue',
    line=dict(color='firebrick')
))
fig_revenue.update_layout(
    title='Historical Revenue',
    xaxis_title='Date',
    yaxis_title='Revenue ($US Millions)',
    plot_bgcolor='rgba(240,240,240,0.95)',
    font=dict(color='maroon'),
)
fig_revenue.show()


  tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(',|\$', '', regex=True)


# Question 6 - GameStop Stock and Revenue Dashboard-

In [62]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go

# ---- STEP 1: Fetch GameStop stock data ----
gme = yf.Ticker("GME")
gme_data = gme.history(period="max")
gme_data.reset_index(inplace=True)
gme_data["Date"] = gme_data["Date"].dt.tz_localize(None)

# ---- STEP 2: Scrape GameStop revenue data ----
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# GameStop revenue is in the second table (tbody[1])
table = soup.find_all("tbody")[1]
rows = table.find_all("tr")

gme_revenue_data = []
for row in rows:
    cols = row.find_all("td")
    if len(cols) == 2:
        date = cols[0].text.strip()
        revenue = cols[1].text.strip().replace("$", "").replace(",", "")
        if date and revenue:
            gme_revenue_data.append({"Date": date, "Revenue": revenue})

gme_revenue = pd.DataFrame(gme_revenue_data)
gme_revenue["Revenue"] = gme_revenue["Revenue"].str.replace(',|\$', '', regex=True)
gme_revenue = gme_revenue[gme_revenue["Revenue"] != ""]
gme_revenue.dropna(inplace=True)
gme_revenue["Revenue"] = gme_revenue["Revenue"].astype(float)
gme_revenue["Date"] = pd.to_datetime(gme_revenue["Date"])

# Filter stock data to match revenue dates
gme_filtered = gme_data[gme_data["Date"] >= gme_revenue["Date"].min()]

# ---- STEP 3: Plot GameStop Historical Share Price ----
fig_price = go.Figure()

fig_price.add_trace(go.Scatter(
    x=gme_filtered["Date"], y=gme_filtered["Close"],
    mode="lines", name="Closing Price",
    line=dict(color="blue", width=1.5)
))

fig_price.update_layout(
    title="Historical Share Price",
    xaxis_title="Date",
    yaxis_title="Price ($US)",
    xaxis_rangeslider_visible=True,
    plot_bgcolor="rgba(240,240,240,0.95)",
    font=dict(color="navy")
)

fig_price.show()

# ---- STEP 4: Plot GameStop Historical Revenue ----
fig_revenue = go.Figure()

fig_revenue.add_trace(go.Scatter(
    x=gme_revenue["Date"], y=gme_revenue["Revenue"],
    mode="lines+markers", name="Revenue",
    line=dict(color="firebrick")
))

fig_revenue.update_layout(
    title="Historical Revenue",
    xaxis_title="Date",
    yaxis_title="Revenue ($US Millions)",
    plot_bgcolor="rgba(240,240,240,0.95)",
    font=dict(color="darkred"),
    annotations=[
        dict(
            text="IBM",  # You can replace this with GME or leave it
            xref="paper", yref="paper",
            x=1.02, y=1.1, showarrow=False,
            font=dict(size=24, color="brown")
        )
    ]
)

fig_revenue.show()



invalid escape sequence '\$'


invalid escape sequence '\$'


invalid escape sequence '\$'

