In [2]:
# Importing the necessary packages
import requests as rq
from bs4 import BeautifulSoup as bfs
import pandas as pd 
import yfinance as yf
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib as mpl
import nbformat

In [3]:

import warnings
# Ignore all warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:

# Define the graphic function
def make_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price", "Historical Revenue"), vertical_spacing = .3)
    stock_data_specific = stock_data[stock_data.Date <= '2021--06-14']
    revenue_data_specific = revenue_data[revenue_data.Date <= '2021-04-30']
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data_specific.Date, infer_datetime_format=True), y=stock_data_specific.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data_specific.Date, infer_datetime_format=True), y=revenue_data_specific.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
    height=900,
    title=stock,
    xaxis_rangeslider_visible=True)
    fig.show()

In [4]:
# 1.Using yfinance to extrack GameStop data
gme=yf.Ticker("GME") # Using the "TICKER" function to extract GameStop data with its ticker symbol "GME".
gme_data=gme.history(period="max") # Using the ticker object and the function `history` extract GameStop stock information.

In [5]:
gme_data.reset_index(inplace=True) # Using the `reset_index(inplace=True)` function on the gme_data DataFrame
gme_data.head(5) # Display the first five rows of the dataframe with the `head' function.

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13,1.620129,1.69335,1.603296,1.691667,76216000,0.0,0.0
1,2002-02-14,1.712708,1.716074,1.670626,1.683251,11021600,0.0,0.0
2,2002-02-15,1.68325,1.687458,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20,1.615921,1.66221,1.603296,1.66221,6892800,0.0,0.0


In [6]:
# 2.using WebScraping to extract GameStop data Revenue
url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html'
html_data= rq.get(url).text

In [7]:
# using BeautifulSoup library to parse the html data
soup=bfs(html_data, 'html5lib')

In [8]:
# Using the beautifulSoup to extract the table with two columns "Date" and "Revenue".
gme_revenue=pd.DataFrame(columns=["Date", "Revenue"])

for row in soup.find_all("tbody")[1].find_all("tr"):
    col=row.find_all("td")
    date=col[0].text
    revenue=col[1].text

    gme_revenue=gme_revenue._append({"Date":date, "Revenue":revenue}, ignore_index=True)

In [9]:
# Display the first five rows of the DataFrame using the 'tail' function
print(gme_revenue.tail(5))

          Date Revenue
57  2006-01-31  $1,667
58  2005-10-31    $534
59  2005-07-31    $416
60  2005-04-30    $475
61  2005-01-31    $709


In [10]:
gme_revenue["Revenue"] = gme_revenue['Revenue'].str.replace(',|\$',",","$")
# This line remove the comma and the dollar sign from the 'Revenue' column.

gme_revenue.dropna(inplace=True)

gme_revenue = gme_revenue[gme_revenue['Revenue'] != ""] # To remove an null or empty strings in the Revenue column.

In [12]:
# Plot GameStop stock graph
make_graph(gme_data, gme_revenue, 'GameStop')


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.



In [13]:
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm'

tesla_revenue=pd.read_html(url, match="Tesla Revenue", flavor="bs4") # To create a directly DataFrame with the pandas library

In [14]:
print(tesla_revenue[0:5])

[                     Link Preview  HTML Code (Click to Copy)
0  Tesla Revenue 2010-2022 | TSLA                        NaN
1                     Macrotrends                        NaN
2                          Source                        NaN,                      Link Preview  HTML Code (Click to Copy)
0  Tesla Revenue 2010-2022 | TSLA                        NaN
1                     Macrotrends                        NaN
2                          Source                        NaN]
