In [None]:
#installing libraries
!pip install yfinance
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2


In [None]:
#importing libraries
import pandas as pd

import yfinance as yf
import requests
from bs4 import BeautifulSoup

import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
def plot_graph(stock_data, revenue_data, stock):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles=("Historical Share Price ($)", "Historical Revenue ($)"), vertical_spacing = .5)
    fig.add_trace(go.Scatter(x=pd.to_datetime(stock_data.Date, infer_datetime_format=True), y=stock_data.Close.astype("float"), name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=pd.to_datetime(revenue_data.Date, infer_datetime_format=True), y=revenue_data.Revenue.astype("float"), name="Revenue"), row=2, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($ Millions)", row=2, col=1)
    fig.update_layout(showlegend=False, height=1000, title=stock, xaxis_rangeslider_visible=True)
    fig.show()

In [None]:
# Using the Ticker function to create a ticker object.
# ticker symbol of tesla is TSLA
tesla_data = yf.Ticker('TSLA')

# history function helps to extract stock information.
# setting period parameter to max to get information for the maximum amount of time.
tsla_data = tesla_data.history(period='max')

# Resetting the index
tsla_data.reset_index(inplace=True)

# display the first five rows
tsla_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,0.0,0.0


In [None]:
# using requests library to download the webpage
url='https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue'

# Save the text of the response
html_text = requests.get(url).text

# Parse the html data using beautiful_soup.
soup=BeautifulSoup(html_text, 'html5lib')

In [None]:
!pip install selenium
!apt-get update
!apt install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin


Collecting selenium
  Downloading selenium-4.23.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.26.2-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0.14->trio-websocket~=0.9->selenium)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading selenium-4.23.1-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.26.2-py3-none-any.whl (475 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m476.0/476.0 kB[0m [31m13.

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time

# Set up the options for headless Chrome
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize WebDriver, providing the ChromeOptions object directly
driver = webdriver.Chrome(options=options)

# Open the URL
url = 'https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue'
driver.get(url)

# Give the page time to load
time.sleep(5)

# Get page source after JavaScript has rendered
html_text = driver.page_source

# Parse the html data using BeautifulSoup
soup = BeautifulSoup(html_text, 'html.parser')

# Create a list to store the data
data = []

# Find the correct table
tables = soup.find_all('table')
if tables:
    for index, table in enumerate(tables):
        if 'Tesla Quarterly Revenue' in str(table):
            table_index = index
            break

    # Extract data if the table was found
    if 'table_index' in locals():
        for row in tables[table_index].tbody.find_all("tr"):
            col = row.find_all("td")
            if col:
                date = col[0].text.strip()
                revenue = col[1].text.strip().replace("$", "").replace(",", "")
                data.append({'Date': date, 'Revenue': revenue}) # Append data to the list
    else:
        print("Table 'Tesla Quarterly Revenue' not found on the page.")
else:
    print("No tables found on the page.")

# Close the driver
driver.quit()

# Create the DataFrame from the collected data
tsla_revenue = pd.DataFrame(data)

# Display dataframe
print(tsla_revenue)

          Date Revenue
0   2024-03-31   21301
1   2023-12-31   25167
2   2023-09-30   23350
3   2023-06-30   24927
4   2023-03-31   23329
5   2022-12-31   24318
6   2022-09-30   21454
7   2022-06-30   16934
8   2022-03-31   18756
9   2021-12-31   17719
10  2021-09-30   13757
11  2021-06-30   11958
12  2021-03-31   10389
13  2020-12-31   10744
14  2020-09-30    8771
15  2020-06-30    6036
16  2020-03-31    5985
17  2019-12-31    7384
18  2019-09-30    6303
19  2019-06-30    6350
20  2019-03-31    4541
21  2018-12-31    7226
22  2018-09-30    6824
23  2018-06-30    4002
24  2018-03-31    3409
25  2017-12-31    3288
26  2017-09-30    2985
27  2017-06-30    2790
28  2017-03-31    2696
29  2016-12-31    2285
30  2016-09-30    2298
31  2016-06-30    1270
32  2016-03-31    1147
33  2015-12-31    1214
34  2015-09-30     937
35  2015-06-30     955
36  2015-03-31     940
37  2014-12-31     957
38  2014-09-30     852
39  2014-06-30     769
40  2014-03-31     621
41  2013-12-31     615
42  2013-09

In [None]:
# removing null values
tsla_revenue = tsla_revenue[tsla_revenue['Revenue']!='']
tsla_revenue

Unnamed: 0,Date,Revenue
0,2024-03-31,21301
1,2023-12-31,25167
2,2023-09-30,23350
3,2023-06-30,24927
4,2023-03-31,23329
5,2022-12-31,24318
6,2022-09-30,21454
7,2022-06-30,16934
8,2022-03-31,18756
9,2021-12-31,17719


In [None]:
plot_graph(tsla_data, tsla_revenue, 'Tesla Historical Share Price & Revenue')


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



2. GameStop Stock Data

yfinance to extract stock data

In [None]:
#  ticker symbol of GameStop is GME
gamestop = yf.Ticker('GME')

# extracting stock information
gme_data=gamestop.history(period='max')

#reset the index
gme_data.reset_index(inplace=True)
gme_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.620128,1.69335,1.603296,1.691667,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.712707,1.716074,1.670626,1.683251,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.68325,1.687458,1.658002,1.674834,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.666418,1.666418,1.578047,1.607504,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.61592,1.66221,1.603296,1.66221,6892800,0.0,0.0


Webscraping to Extract GME revenue data

In [None]:
# using requests library to download the webpage
url = 'https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue'

# Save the text of the response
html_data = requests.get(url).text

# parse the html data
soup=BeautifulSoup(html_data, 'html5lib')

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
import yfinance as yf

# Set up the options for headless Chrome
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize WebDriver
driver = webdriver.Chrome(options=options)

# Open the URL
url = 'https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue'
driver.get(url)

# Give the page time to load
time.sleep(5)

# Get page source after JavaScript has rendered
html_data = driver.page_source

# Parse the html data using BeautifulSoup
soup = BeautifulSoup(html_data, 'html.parser')

# Create new dataframe
gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])
tables = soup.find_all('table')

table_index = None  # Initialize table_index as None
for index, table in enumerate(tables):
    if 'GameStop Quarterly Revenue' in str(table):
        table_index = index
        break  # Exit loop once the table is found

# Check if the table was found before proceeding
if table_index is not None:
    for row in tables[table_index].tbody.find_all("tr"):
        col = row.find_all("td")
        if col:
            date = col[0].text.strip()
            # Remove comma and dollar sign
            revenue = col[1].text.strip().replace("$", "").replace(",", "")
            # Use .loc to add rows to the dataframe
            gme_revenue.loc[len(gme_revenue.index)] = {'Date': date, 'Revenue': revenue}
else:
    print("Table with 'GameStop Quarterly Revenue' not found.")

# Close the driver
driver.quit()

# Display dataframe
print(gme_revenue.head())

         Date Revenue
0  2024-04-30     882
1  2024-01-31    1794
2  2023-10-31    1078
3  2023-07-31    1164
4  2023-04-30    1237


Plot GameStop stock graph

In [None]:
plot_graph(gme_data, gme_revenue, 'GameStop')


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



### THANK YOU