<a href="https://colab.research.google.com/github/TheDataCleaner0/Web-Scraping-Using-Python/blob/main/Historical_Stock_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
!pip install yfinance
!pip install bs4

import pandas as pd
import yfinance as yf
import requests
from bs4 import BeautifulSoup

import plotly.graph_objects as go
from plotly.subplots import make_subplots



In [73]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data.Date.dt.date), y=stock_data.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data.Date.dt.date), y=revenue_data.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Billions)", row=2, col=1)
    fig.update_layout(showlegend=False,
                      height=900,
                      title=stock,
                      xaxis_rangeslider_visible=True)
    fig.show()

In [28]:
stock_ticker = yf.Ticker('NVDA')
stock_data = stock_ticker.history(period='max')
stock_data.reset_index(inplace=True) # by default date is the index
stock_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1999-01-22 00:00:00-05:00,0.040116,0.044772,0.035579,0.037609,2714688000,0.0,0.0
1,1999-01-25 00:00:00-05:00,0.040594,0.042026,0.037609,0.041549,510480000,0.0,0.0
2,1999-01-26 00:00:00-05:00,0.042026,0.042862,0.037728,0.038325,343200000,0.0,0.0
3,1999-01-27 00:00:00-05:00,0.038444,0.039400,0.036295,0.038206,244368000,0.0,0.0
4,1999-01-28 00:00:00-05:00,0.038206,0.038444,0.037848,0.038086,227520000,0.0,0.0
...,...,...,...,...,...,...,...,...
6664,2025-07-22 00:00:00-04:00,171.339996,171.389999,164.580002,167.029999,193114300,0.0,0.0
6665,2025-07-23 00:00:00-04:00,169.529999,171.259995,167.970001,170.779999,154082200,0.0,0.0
6666,2025-07-24 00:00:00-04:00,172.440002,173.830002,171.300003,173.740005,128984600,0.0,0.0
6667,2025-07-25 00:00:00-04:00,173.610001,174.720001,172.960007,173.500000,122316800,0.0,0.0


In [70]:
print(stock_data.Date)

0      1999-01-22 00:00:00-05:00
1      1999-01-25 00:00:00-05:00
2      1999-01-26 00:00:00-05:00
3      1999-01-27 00:00:00-05:00
4      1999-01-28 00:00:00-05:00
                  ...           
6664   2025-07-22 00:00:00-04:00
6665   2025-07-23 00:00:00-04:00
6666   2025-07-24 00:00:00-04:00
6667   2025-07-25 00:00:00-04:00
6668   2025-07-28 00:00:00-04:00
Name: Date, Length: 6669, dtype: datetime64[ns, America/New_York]


In [50]:
url =  'https://companiesmarketcap.com/cad/nvidia/revenue/'

page = requests.get(url).text

soup = BeautifulSoup(page, 'html.parser')

print(soup.prettify())


<!DOCTYPE html>
<html lang="en-CA">
 <head>
  <title>
   NVIDIA (NVDA) - Revenue
  </title>
  <meta charset="utf-8"/>
  <link href="https://companiesmarketcap.com/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
  <link href="/img/icons/apple-icon-57x57.png" rel="apple-touch-icon" sizes="57x57"/>
  <link href="/img/icons/apple-icon-60x60.png" rel="apple-touch-icon" sizes="60x60"/>
  <link href="/img/icons/apple-icon-72x72.png" rel="apple-touch-icon" sizes="72x72"/>
  <link href="/img/icons/apple-icon-76x76.png" rel="apple-touch-icon" sizes="76x76"/>
  <link href="/img/icons/apple-icon-114x114.png" rel="apple-touch-icon" sizes="114x114"/>
  <link href="/img/icons/apple-icon-120x120.png" rel="apple-touch-icon" sizes="120x120"/>
  <link href="/img/icons/apple-icon-144x144.png" rel="apple-touch-icon" sizes="144x144"/>
  <link href="/img/icons/apple-icon-152x152.png" rel="apple-touch-icon" sizes="152x152"/>
  <link href="/img/icons/apple-icon-180x180.png" rel="apple-touch-icon" sizes=

In [64]:
soup1 = soup.find(class_='table')
revenue_data_list = []

for row in soup1.find('tbody').find_all('tr'):
  col = row.find_all('td')
  date = col[0].text.replace('(TTM)','')
  revenue = col[1].text.replace('C$','').replace('B','')
  revenue_data_list.append({'Date':date, 'Revenue':revenue})

nvda_revenue = pd.DataFrame(revenue_data_list)

nvda_revenue

Unnamed: 0,Date,Revenue
0,2025,208.03
1,2024,187.26
2,2023,80.8
3,2022,36.6
4,2021,34.3
5,2020,21.26
6,2019,14.25
7,2018,15.95
8,2017,12.21
9,2016,9.28


In [74]:
make_graph(stock_data, nvda_revenue, 'Nvidia Historical Share Price & Revenue')