<a href="https://colab.research.google.com/github/Notfamousyet/Tools_For_Data_Science/blob/main/Extracting_and_Visualizing_Stock_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py) ... [?25l[?25hdone
  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1256 sha256=ac967391ffe98eec10c8f0952be1e459def0fa1350126086b572a04a5deaf829
  Stored in directory: /root/.cache/pip/wheels/25/42/45/b773edc52acb16cd2db4cf1a0b47117e2f69bb4eb300ed0e70
Successfully built bs4
Installing collected packages: bs4
Successfully installed bs4-0.0.1


In [2]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data.Date, infer_datetime_format=True), y=stock_data.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data.Date, infer_datetime_format=True), y=revenue_data.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()

**Question 1: Use yfinance to Extract Stock Data**

In [4]:
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)
tesla_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


**Question 2: Use Webscraping to Extract Tesla Revenue Data**


In [8]:
url = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"

# Add headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request with headers
response = requests.get(url, headers=headers)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    html_data = response.text
    soup = BeautifulSoup(html_data, "html5lib")

    # Extract tables directly using pd.read_html
    tables = pd.read_html(html_data, match="Tesla Quarterly Revenue", flavor='bs4')

    # Assume the first table is the one we want
    tesla_revenue = tables[0]

    # Rename columns
    tesla_revenue = tesla_revenue.rename(columns={
        'Tesla Quarterly Revenue(Millions of US $)': 'Date',
        'Tesla Quarterly Revenue(Millions of US $).1': 'Revenue'
    })

    # Clean the 'Revenue' column
    tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(",", "").str.replace("$", "")

    # Drop NaN values
    tesla_revenue.dropna(inplace=True)

    # Display the last few rows
    print(tesla_revenue.tail())

else:
    print(f"Error: Unable to retrieve data. Status code {response.status_code}")


Error: Unable to retrieve data. Status code 403


**Question 3: Use yfinance to Extract Stock Data**

In [9]:
gamestop = yf.Ticker("GME")
gme_data=gamestop.history(period="max")
gme_data.reset_index(inplace=True)
gme_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620128,1.69335,1.603296,1.691666,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.68325,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.68325,1.687458,1.658001,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.615921,1.66221,1.603296,1.66221,6892800,0.0,0.0


**Question 4: Use Webscraping to Extract GME Revenue Data**

In [11]:
url = "https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue"

# Add headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request with headers
response = requests.get(url, headers=headers)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    html_data = response.text
    soup = BeautifulSoup(html_data, "html5lib")

    # Extract tables directly using pd.read_html
    tables = pd.read_html(html_data, match="GameStop Quarterly Revenue", flavor='bs4')

    # Assume the first table is the one we want
    gme_revenue = tables[0]

    # Rename columns
    gme_revenue = gme_revenue.rename(columns={
        'GameStop Quarterly Revenue(Millions of US $)': 'Date',
        'GameStop Quarterly Revenue(Millions of US $).1': 'Revenue'
    })

    # Clean the 'Revenue' column
    gme_revenue["Revenue"] = gme_revenue["Revenue"].str.replace(",", "")

    # Drop NaN values
    gme_revenue.dropna(inplace=True)

    # Display the last few rows
    print(gme_revenue.tail())

else:
    print(f"Error: Unable to retrieve data. Status code {response.status_code}")


Error: Unable to retrieve data. Status code 403


**Question 5: Plot Tesla Stock Graph**

In [None]:
make_graph(tesla_data, tesla_revenue, 'Tesla Stock Data Graph')

**Question 6: Plot GameStop Stock Graph**

In [None]:
make_graph(gme_data, gme_revenue, 'GameStop Stock Data Graph')