**Extracting and Visualizing Stock Data**

In [1]:
# Importing libraries 

import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [38]:
# Defining a function to make a graph based on the stock data
# Note the graph will only show data upto June 2021.

def make_graph(stock_data, revenue_data, stock):
    
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        subplot_titles=("Historical Share Price", "Historical Revenue"),
        vertical_spacing = .3)
    
    stock_data_specific = stock_data[stock_data.Date <= '2021--06-30']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-06-30']
    
    fig.add_trace(
        go.Scatter( 
            x=pd.to_datetime(stock_data_specific.Date, infer_datetime_format=True),
            y=stock_data_specific.Close.astype("float"), name="Share Price"),
        row=1, col=1)
    
    fig.add_trace(
        go.Scatter(
            x=pd.to_datetime(revenue_data_specific.Date, infer_datetime_format=True), 
            y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), 
        row=2, col=1)
    
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    
    fig.update_layout(
        showlegend=False, 
        height=900,
        title=stock,
        xaxis_rangeslider_visible = True)
    
    fig.show()

In [3]:
#Extrating Stock Data using yfinance

# The stock is Tesla and its ticker symbol is TSLA.
# Using the Ticker function enter the ticker symbol of the stock to create a ticker object. 

tesla = yf.Ticker("TSLA")

In [4]:
# Using the ticker object and the function history extract stock information and 
# save it in a dataframe named tesla_data. 

# Set the period parameter to max so we get information for the maximum amount of time.

tesla_data = tesla.history(period="max")


In [5]:
# Reset the index

tesla_data.reset_index(inplace=True)
tesla_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0,0.0


In [6]:
# Using Webscraping to Extract Tesla Revenue Data

url = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"
tesla_html_data = requests.get(url).text

# Parse the html data 

soup = BeautifulSoup(tesla_html_data, 'html5lib')


In [10]:
# Use find_all and len to see how many tables are in the website 

tables = soup.find_all('table')
len(tables)

6

In [11]:
# Based on the name of the table, I can get the index of the table required
for i, table in enumerate(tables):
    if ("Tesla Quarterly Revenue" in str(table)):
        table_index = i
print(table_index)


1


In [16]:
# Using read_html function extract the table with Tesla Quarterly Revenue 
# and store it into a dataframe named tesla_revenue.

tesla_revenue = pd.read_html(str(tables[1]), flavor='bs4')[0]
tesla_revenue.head(10) 

Unnamed: 0,Tesla Quarterly Revenue (Millions of US $),Tesla Quarterly Revenue (Millions of US $).1
0,2022-09-30,"$21,454"
1,2022-06-30,"$16,934"
2,2022-03-31,"$18,756"
3,2021-12-31,"$17,719"
4,2021-09-30,"$13,757"
5,2021-06-30,"$11,958"
6,2021-03-31,"$10,389"
7,2020-12-31,"$10,744"
8,2020-09-30,"$8,771"
9,2020-06-30,"$6,036"


In [29]:
# Let's rename the columns for a better reference

tesla_revenue = tesla_revenue.rename({'Tesla Quarterly Revenue (Millions of US $)' : 'Date',
                      'Tesla Quarterly Revenue (Millions of US $).1' : 'Revenue'}, axis = 1)

In [30]:
# Execute the following line to remove the comma and dollar sign from the Revenue column.

tesla_revenue['Revenue'] = tesla_revenue['Revenue'].str.replace(',|\$',"")


The default value of regex will change from True to False in a future version.



In [32]:
# Execute the following lines to remove an null or empty strings in the Revenue column.
tesla_revenue.dropna(inplace=True)
tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]

In [35]:
tesla_revenue.head(10)

Unnamed: 0,Date,Revenue
0,2022-09-30,21454
1,2022-06-30,16934
2,2022-03-31,18756
3,2021-12-31,17719
4,2021-09-30,13757
5,2021-06-30,11958
6,2021-03-31,10389
7,2020-12-31,10744
8,2020-09-30,8771
9,2020-06-30,6036


In [39]:
# Use the make_graph function to graph the Tesla Stock Data, also provide a title for the graph.

make_graph(tesla_data, tesla_revenue, 'Tesla')

Using the same process on a different stock

In [42]:
# Using the Ticker function enter the ticker symbol of the stock to create a ticker object. 
# The stock is Apple and its ticker symbol is AAPL.
Apple = yf.Ticker("AAPL")

# Using the ticker object and the function history extract stock information and save it in a dataframe 
# named gme_data. 
# Set the period parameter to max so we get information for the maximum amount of time.
apple_data = gamestop.history(period="max")

# Reset the index 
apple_data.reset_index(inplace=True)

#showing the first 10 rows
apple_data.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1980-12-12 00:00:00-05:00,0.099874,0.100308,0.099874,0.099874,469033600,0.0,0.0
1,1980-12-15 00:00:00-05:00,0.095098,0.095098,0.094663,0.094663,175884800,0.0,0.0
2,1980-12-16 00:00:00-05:00,0.088149,0.088149,0.087715,0.087715,105728000,0.0,0.0
3,1980-12-17 00:00:00-05:00,0.089886,0.09032,0.089886,0.089886,86441600,0.0,0.0
4,1980-12-18 00:00:00-05:00,0.092492,0.092926,0.092492,0.092492,73449600,0.0,0.0
5,1980-12-19 00:00:00-05:00,0.098137,0.098571,0.098137,0.098137,48630400,0.0,0.0
6,1980-12-22 00:00:00-05:00,0.102913,0.103348,0.102913,0.102913,37363200,0.0,0.0
7,1980-12-23 00:00:00-05:00,0.107256,0.10769,0.107256,0.107256,46950400,0.0,0.0
8,1980-12-24 00:00:00-05:00,0.112901,0.113335,0.112901,0.112901,48003200,0.0,0.0
9,1980-12-26 00:00:00-05:00,0.123323,0.123757,0.123323,0.123323,55574400,0.0,0.0


In [44]:

# Using Webscraping to Extract Apple Revenue Data

url = "https://www.macrotrends.net/stocks/charts/AAPL/apple/revenue"
apple_html_data = requests.get(url).text

# Parse the html data 
soup2 = BeautifulSoup(apple_html_data, 'html5lib')


In [51]:
# Use find_all and len to see how many tables are in the website 

tables_apple = soup2.find_all('table')
len(tables_apple)

6

In [52]:
# Based on the name of the table, I can get the index of the table required
for i, table in enumerate(tables_apple):
    if ("Apple Quarterly Revenue" in str(table)):
        table_index = i
print(table_index)

1


In [53]:
# Using read_html function extract the table with Apple Quarterly Revenue 
# and store it into a dataframe named apple_revenue.

Apple_revenue = pd.read_html(str(tables[1]), flavor='bs4')[0]
Apple_revenue.head(10) 

Unnamed: 0,Apple Quarterly Revenue (Millions of US $),Apple Quarterly Revenue (Millions of US $).1
0,2022-09-30,"$90,146"
1,2022-06-30,"$82,959"
2,2022-03-31,"$97,278"
3,2021-12-31,"$123,945"
4,2021-09-30,"$83,360"
5,2021-06-30,"$81,434"
6,2021-03-31,"$89,584"
7,2020-12-31,"$111,439"
8,2020-09-30,"$64,698"
9,2020-06-30,"$59,685"


In [54]:
# Let's rename the columns for a better reference

Apple_revenue = tesla_revenue.rename({'Apple Quarterly Revenue (Millions of US $)' : 'Date',
                      'Apple Quarterly Revenue (Millions of US $).1' : 'Revenue'}, axis = 1)


# Execute the following line to remove the comma and dollar sign from the Revenue column.
Apple_revenue['Revenue'] = Apple_revenue['Revenue'].str.replace(',|\$',"")


# Execute the following lines to remove an null or empty strings in the Revenue column.
Apple_revenue.dropna(inplace=True)
Apple_revenue = Apple_revenue[Apple_revenue['Revenue'] != ""]


The default value of regex will change from True to False in a future version.



In [55]:
Apple_revenue.head(10)

Unnamed: 0,Date,Revenue
0,2022-09-30,90146
1,2022-06-30,82959
2,2022-03-31,97278
3,2021-12-31,123945
4,2021-09-30,83360
5,2021-06-30,81434
6,2021-03-31,89584
7,2020-12-31,111439
8,2020-09-30,64698
9,2020-06-30,59685


In [56]:
# Use the make_graph function to graph the Apple Stock Data, also provide a title for the graph.

make_graph(apple_data, Apple_revenue, 'Apple')