In [10]:
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [14]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
import plotly.io as pio
pio.renderers.default = "iframe"

###In Python, you can ignore warnings using the warnings module. You can use the filterwarnings function to filter or ignore specific warning messages or categories.

In [3]:
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [21]:
# Define Graphing Function
def make_graph(stock_data, revenue_data, stock_name):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                        subplot_titles=("Historical Share Price", "Historical Revenue"),
                        vertical_spacing = .3)

    # 1. Ensure 'Date' columns are datetime and handle errors
    stock_data["Date"] = pd.to_datetime(stock_data["Date"], errors='coerce')
    revenue_data["Date"] = pd.to_datetime(revenue_data["Date"], errors='coerce')

    # Filter stock data
    stock_data_filtered = stock_data[stock_data['Date'] <= '2021-06-30']

    # Filter revenue data
    revenue_data_filtered = revenue_data[revenue_data['Date'] <= '2021-06-30']


    # 3. Ensure 'Close' and 'Revenue' are numeric and drop NaNs (after filtering)
    stock_data_filtered['Close'] = pd.to_numeric(stock_data_filtered['Close'], errors='coerce')
    revenue_data_filtered['Revenue'] = pd.to_numeric(revenue_data_filtered['Revenue'], errors='coerce')

    stock_data_filtered.dropna(subset=['Date', 'Close'], inplace=True)
    revenue_data_filtered.dropna(subset=['Date', 'Revenue'], inplace=True)

    # 4. Sort data by Date for correct plotting order
    stock_data_filtered.sort_values(by='Date', inplace=True)
    revenue_data_filtered.sort_values(by='Date', inplace=True)
    revenue_data_filtered.reset_index(drop=True, inplace=True)

    # 5. Add traces to the figure
    fig.add_trace(go.Scatter(x=stock_data_filtered.Date, y=stock_data_filtered.Close.astype("float"),
                             name="Share Price"), row=1, col=1)
    fig.add_trace(go.Scatter(x=revenue_data_filtered.Date, y=revenue_data_filtered.Revenue.astype("float"),
                             name="Revenue"), row=2, col=1)
    # 6. Update layout and show figure
    fig.update_xaxes(title_text="Date", rangeslider_visible=True)
    fig.update_yaxes(title_text="Price ($US)", row=1, col=1)
    fig.update_yaxes(title_text="Revenue ($US Millions)", row=2, col=1)
    fig.update_layout(showlegend=False,
                      height=600,
                      title_text=stock_name)
    fig.show()
print("make_graph function defined with date filtering and AttributeError fix!")

make_graph function defined with date filtering and AttributeError fix!


In [24]:
# Question 1: Use yfinance to Extract Stock Data (Tesla)
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True) # Ensures 'Date' is a column

# *Crucial Check for 'Date' column after reset_index*
tesla_data['Date'] = pd.to_datetime(tesla_data['Date'], errors='coerce')
tesla_data.dropna(subset=['Date'], inplace=True) # Remove rows if date conversion failed

print("First five rows of Tesla Stock Data:")
print(tesla_data.head())

First five rows of Tesla Stock Data:
                       Date      Open      High       Low     Close  \
0 2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667   
1 2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667   
2 2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000   
3 2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   
4 2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000   

      Volume  Dividends  Stock Splits  
0  281494500        0.0           0.0  
1  257806500        0.0           0.0  
2  123282000        0.0           0.0  
3   77097000        0.0           0.0  
4  103003500        0.0           0.0  


In [25]:
# Question 2: Use Webscraping to Extract Tesla Revenue Data
url_tesla_revenue = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/revenue.htm"
html_data_tesla = requests.get(url_tesla_revenue).text
soup_tesla = BeautifulSoup(html_data_tesla, 'html.parser')

read_html_pandas_data_tesla = pd.read_html(url_tesla_revenue)
tesla_revenue = read_html_pandas_data_tesla[1]

tesla_revenue.columns = ["Date", "Revenue"]

tesla_revenue["Revenue"] = tesla_revenue["Revenue"].str.replace(r'\$|,', "", regex=True)

# Convert to numeric and drop NaNs
tesla_revenue['Revenue'] = pd.to_numeric(tesla_revenue['Revenue'], errors='coerce')
tesla_revenue.dropna(subset=['Revenue'], inplace=True)

# *Crucial Check for 'Date' column conversion and sorting*
tesla_revenue['Date'] = pd.to_datetime(tesla_revenue['Date'], errors='coerce')
tesla_revenue.dropna(subset=['Date'], inplace=True)
tesla_revenue.sort_values(by='Date', inplace=True)
tesla_revenue.reset_index(drop=True, inplace=True) # Ensures index is clean after sorting

print("\nLast five rows of Cleaned Tesla Revenue Data:")
print(tesla_revenue.tail())


Last five rows of Cleaned Tesla Revenue Data:
         Date  Revenue
48 2021-09-30  13757.0
49 2021-12-31  17719.0
50 2022-03-31  18756.0
51 2022-06-30  16934.0
52 2022-09-30  21454.0


In [26]:
# Question 3: Use yfinance to Extract Stock Data (GameStop)
gamestop = yf.Ticker("GME")
gme_data = gamestop.history(period="max")
gme_data.reset_index(inplace=True) # Ensures 'Date' is a column

# *Crucial Check for 'Date' column after reset_index*
gme_data['Date'] = pd.to_datetime(gme_data['Date'], errors='coerce')
gme_data.dropna(subset=['Date'], inplace=True) # Remove rows if date conversion failed

print("\nFirst five rows of GameStop Stock Data:")
print(gme_data.head())


First five rows of GameStop Stock Data:
                       Date      Open      High       Low     Close    Volume  \
0 2002-02-13 00:00:00-05:00  1.620129  1.693350  1.603296  1.691667  76216000   
1 2002-02-14 00:00:00-05:00  1.712707  1.716074  1.670626  1.683250  11021600   
2 2002-02-15 00:00:00-05:00  1.683250  1.687458  1.658001  1.674834   8389600   
3 2002-02-19 00:00:00-05:00  1.666418  1.666418  1.578048  1.607504   7410400   
4 2002-02-20 00:00:00-05:00  1.615921  1.662210  1.603296  1.662210   6892800   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


In [30]:
# Question 4: Use Webscraping to Extract GME Revenue Data
url_gme_revenue = " https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/labs/project/stock.html."

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

try:
    html_data_gme = requests.get(url_gme_revenue, headers=headers).text
    soup_gme = BeautifulSoup(html_data_gme, 'html.parser')

    read_html_pandas_data_gme = pd.read_html(html_data_gme) # Use html_data_gme here, not URL
    gme_revenue = read_html_pandas_data_gme[1] # Assuming it's the second table

    # Rename columns (Macrotrends tables often have numbered columns initially)
    if gme_revenue.shape[1] >= 2:
        gme_revenue.columns = ["Date", "Revenue"]
    else:
        print("Warning: Table might not have expected 'Date' and 'Revenue' columns after read_html.")
        print(gme_revenue.head())

    gme_revenue["Revenue"] = gme_revenue["Revenue"].str.replace(r'\$|,', "", regex=True)

    gme_revenue['Revenue'] = pd.to_numeric(gme_revenue['Revenue'], errors='coerce')
    gme_revenue.dropna(subset=['Revenue'], inplace=True)

    # *Crucial Check for 'Date' column conversion and sorting*
    gme_revenue['Date'] = pd.to_datetime(gme_revenue['Date'], errors='coerce')
    gme_revenue.dropna(subset=['Date'], inplace=True)
    gme_revenue.sort_values(by='Date', inplace=True)
    gme_revenue.reset_index(drop=True, inplace=True)

    print("\nLast five rows of Cleaned GameStop Revenue Data:")
    print(gme_revenue.tail())
    print("\nInfo on GameStop Revenue Data:")
    print(gme_revenue.info()) # Check data types and columns

except requests.exceptions.RequestException as e:
    print(f"Error fetching data from {url_gme_revenue}: {e}")
    print("Please check the URL or contact your instructor for an alternative data source.")
    gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])
except IndexError:
    print("Error: Could not find the expected table structure using pd.read_html[1].")
    print("Please inspect the webpage HTML at the URL to find the correct table index or parsing method.")
    gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])
except Exception as e:
    print(f"An unexpected error occurred during GME revenue data processing: {e}")
    gme_revenue = pd.DataFrame(columns=["Date", "Revenue"])

An unexpected error occurred during GME revenue data processing: No tables found



It looks like you're using an HTML parser to parse an XML document.

Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.






It looks like you're using an HTML parser to parse an XML document.

Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.







In [28]:
# Question 5: Plot Tesla Stock Graph
print("\nGenerating graph for Tesla Stock and Revenue...")
if not tesla_data.empty and not tesla_revenue.empty:
    make_graph(tesla_data, tesla_revenue, 'Tesla Stock and Revenue History')
else:
    print("Tesla data is not available for plotting. Please check previous steps.")


Generating graph for Tesla Stock and Revenue...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice

In [29]:
# Question 6: Plot GameStop Stock Graph
print("\nGenerating graph for GameStop Stock and Revenue...")
# Check if gme_data and gme_revenue are not empty before plotting
if not gme_data.empty and not gme_revenue.empty:
    make_graph(gme_data, gme_revenue, 'GameStop Stock and Revenue History')
else:
    print("GameStop data is not available for plotting. Please check previous steps (especially Q4).")


Generating graph for GameStop Stock and Revenue...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice