In [108]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Define Graphing Function


In [109]:
def graph(stock_data, revenue_data, stock):
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        subplot_titles=("Historical Share Price", "Historical Revenue"),
        vertical_spacing=0.3
    )
    
    # Filter data by date
    stock_data_specific = stock_data[stock_data.Date <= '2024-06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2024-04-30']
    
    # Clean Revenue data
    revenue_data_specific["Revenue"] = revenue_data_specific["Revenue"].replace(
        {r"[\$,]": ""}, regex=True
    ).astype("float")

    # Add traces for stock prices and revenue
    fig.add_trace(
        go.Scatter(
            x=pd.to_datetime(stock_data_specific.Date),
            y=stock_data_specific.Close.astype("float"),
            name="Share Price"
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=pd.to_datetime(revenue_data_specific.Date),
            y=revenue_data_specific.Revenue,
            name="Revenue"
        ),
        row=2, col=1
    )

    # Update axis titles
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)

    # Final layout adjustments
    fig.update_layout(
        showlegend=False,
        height=900,
        title=stock,
        xaxis_rangeslider_visible=True
    )
    fig.show()

# Call the function


## Question 1: Use yfinance to Extract Stock Data


In [110]:
tesla=yf.Ticker("TSLA")

In [111]:
tesla_data=tesla.history(period="max")

In [112]:
tesla_data.reset_index(inplace=True)
tesla_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,281494500,0,0.0
1,2010-06-30,1.719333,2.028000,1.553333,1.588667,257806500,0,0.0
2,2010-07-01,1.666667,1.728000,1.351333,1.464000,123282000,0,0.0
3,2010-07-02,1.533333,1.540000,1.247333,1.280000,77097000,0,0.0
4,2010-07-06,1.333333,1.333333,1.055333,1.074000,103003500,0,0.0
...,...,...,...,...,...,...,...,...
3655,2025-01-07,405.829987,414.329987,390.000000,394.359985,75699500,0,0.0
3656,2025-01-08,392.950012,402.500000,387.399994,394.940002,73038800,0,0.0
3657,2025-01-10,391.399994,399.279999,377.290009,394.739990,62287300,0,0.0
3658,2025-01-13,383.209991,403.790009,380.070007,403.309998,67580500,0,0.0


## Question 2: Use Webscraping to Extract Tesla Revenue Data


Use the `requests` library to download the webpage https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm Save the text of the response as a variable named `html_data`.


In [113]:
url="https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue?utm_source=chatgpt.com"
html_data=requests.get(url).text

In [114]:
soup=BeautifulSoup(html_data,'html5lib')

Using `BeautifulSoup` or the `read_html` function extract the table with `Tesla Quarterly Revenue` and store it into a dataframe named `tesla_revenue`. The dataframe should have columns `Date` and `Revenue`.


<details><summary>Click here if you need help locating the table</summary>

```
    
Below is the code to isolate the table, you will now need to loop through the rows and columns like in the previous lab
    
soup.find_all("tbody")[1]
    
If you want to use the read_html function the table is located at index 1


```

</details>


In [134]:
# pip install selenium

In [135]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
# Set up Selenium WebDriver
driver = webdriver.Chrome()  # Or use another driver, e.g., FirefoxDriver
driver.get("https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue")
# Wait for the page to load (adjust if necessary)
driver.implicitly_wait(10)
# Get the page source and parse it with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")
# Close the WebDriver
driver.quit()
# Scrape the data
tesla_revenue = pd.DataFrame(columns=["Date", "Revenue"])
second_table = soup.find_all("table")[1]
html_tbody = second_table.find("tbody")
list_tag_rows_tr = html_tbody.find_all("tr")

rows = []
for row in list_tag_rows_tr:
    col = row.find_all("td")
    date = col[0].text.strip()
    revenue = col[1].text.strip()
    rows.append({"Date": date, "Revenue": revenue})

# Use pd.concat to append rows
tesla_revenue = pd.concat([tesla_revenue, pd.DataFrame(rows)], ignore_index=True)

print(tesla_revenue)


ReadTimeoutError: HTTPConnectionPool(host='localhost', port=49607): Read timed out. (read timeout=120)

In [117]:
print(soup.prettify())

<html class="js flexbox canvas canvastext webgl no-touch geolocation postmessage no-websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface generatedcontent video audio localstorage sessionstorage webworkers no-applicationcache svg inlinesvg smil svgclippaths" style="">
 <!--<![endif]-->
 <script async="" id="IC_GTagScript" src="https://www.googletagmanager.com/gtag/js?id=G-5EST90M6PX" type="text/javascript">
 </script>
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <link href="https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue" rel="canonical"/>
  <link href="https://m.macrotrends.net/stocks/charts/TSLA/tesla/revenue" media="only screen and (max-width: 768px)" rel="alternate"/>
  <title>
   Tesla Revenue 2010-2024 |

In [118]:
tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace(',|\$',"")

Execute the following lines to remove an null or empty strings in the Revenue column.


In [119]:
tesla_revenue.dropna(inplace=True)

tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]

Display the last 5 row of the `tesla_revenue` dataframe using the `tail` function. Take a screenshot of the results.


In [120]:
tesla_revenue

Unnamed: 0,Date,Revenue
0,2024-09-30,"$25,182"
1,2024-06-30,"$25,500"
2,2024-03-31,"$21,301"
3,2023-12-31,"$25,167"
4,2023-09-30,"$23,350"
...,...,...
56,2010-09-30,$31
57,2010-06-30,$28
58,2010-03-31,$21
60,2009-09-30,$46


## Question 3: Use yfinance to Extract Stock Data


Using the `Ticker` function enter the ticker symbol of the stock we want to extract data on to create a ticker object. The stock is GameStop and its ticker symbol is `GME`.


In [121]:
GameStop=yf.Ticker("GME")

Using the ticker object and the function `history` extract stock information and save it in a dataframe named `gme_data`. Set the `period` parameter to `max` so we get information for the maximum amount of time.


In [122]:
gme_data=GameStop.history(period="max")

In [123]:
gme_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-02-13,1.620128,1.693350,1.603296,1.691666,76216000,0.0,0.0
2002-02-14,1.712707,1.716074,1.670626,1.683250,11021600,0.0,0.0
2002-02-15,1.683251,1.687459,1.658002,1.674834,8389600,0.0,0.0
2002-02-19,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
2002-02-20,1.615920,1.662210,1.603296,1.662210,6892800,0.0,0.0
...,...,...,...,...,...,...,...
2025-01-07,32.799999,34.400002,31.709999,33.369999,13360700,0.0,0.0
2025-01-08,32.970001,33.369999,32.410000,32.959999,6320000,0.0,0.0
2025-01-10,32.500000,32.939999,31.400000,32.310001,7068200,0.0,0.0
2025-01-13,31.600000,31.799999,30.900000,31.020000,5567000,0.0,0.0


**Reset the index** using the `reset_index(inplace=True)` function on the gme_data DataFrame and display the first five rows of the `gme_data` dataframe using the `head` function. Take a screenshot of the results and code from the beginning of Question 3 to the results below.


In [124]:
gme_data.reset_index(inplace=True)
gme_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13,1.620128,1.69335,1.603296,1.691666,76216000,0.0,0.0
1,2002-02-14,1.712707,1.716074,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15,1.683251,1.687459,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20,1.61592,1.66221,1.603296,1.66221,6892800,0.0,0.0


## Question 4: Use Webscraping to Extract GME Revenue Data


Use the `requests` library to download the webpage https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html. Save the text of the response as a variable named `html_data`.


In [127]:
url="https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue"
html_data=requests.get(url).text

Parse the html data using `beautiful_soup`.


In [128]:
soup=BeautifulSoup(html_data,'html5lib')

Using `BeautifulSoup` or the `read_html` function extract the table with `GameStop Quarterly Revenue` and store it into a dataframe named `gme_revenue`. The dataframe should have columns `Date` and `Revenue`. Make sure the comma and dollar sign is removed from the `Revenue` column using a method similar to what you did in Question 2.


<details><summary>Click here if you need help locating the table</summary>

```
    
Below is the code to isolate the table, you will now need to loop through the rows and columns like in the previous lab
    
soup.find_all("tbody")[1]
    
If you want to use the read_html function the table is located at index 1


```

</details>


In [136]:
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver

# Set up Selenium WebDriver
driver = webdriver.Chrome()  # Adjust path if needed
driver.get("https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue")
driver.implicitly_wait(10)

# Parse the page source
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()

# Initialize DataFrame
gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])

# Find the second table
tables = soup.find_all("table")
if len(tables) > 1:
    second_table = tables[1]
    tbody = second_table.find("tbody")
    rows = tbody.find_all("tr")

    # Collect rows in a list
    data = []
    for row in rows:
        cols = row.find_all("td")
        date = cols[0].text.strip()
        revenue = cols[1].text.strip().replace(',', '').replace('$', '')
        data.append({"Date": date, "Revenue": revenue})

    # Create DataFrame from collected rows
    gme_revenue = pd.DataFrame(data)

# Display the last 5 rows
print(gme_revenue.tail())


ReadTimeoutError: HTTPConnectionPool(host='localhost', port=50659): Read timed out. (read timeout=120)

Display the last five rows of the `gme_revenue` dataframe using the `tail` function. Take a screenshot of the results.


In [None]:
gme_revenue

## Question 5: Plot Tesla Stock Graph


Use the `make_graph` function to graph the GameStop Stock Data, also provide a title for the graph. The structure to call the `make_graph` function is `make_graph(gme_data, gme_revenue, 'GameStop')`. Note the graph will only show data upto June 2021.


In [None]:
graph(gme_data, gme_revenue, 'GameStop')