# Peer‑graded Assignment: Analyzing Historical Stock/Revenue Data and Building a Dashboard

This notebook follows the required steps:

- **Q1**: Extract Tesla stock data using `yfinance`, reset index, and show `head()`  
- **Q2**: Web‑scrape Tesla revenue data and show `tail()`  
- **Q3**: Extract GameStop stock data using `yfinance`, reset index, and show `head()`  
- **Q4**: Web‑scrape GameStop revenue data and show `tail()`  
- **Q5**: Plot Tesla stock vs revenue using `make_graph()`  
- **Q6**: Plot GameStop stock vs revenue using `make_graph()`  

> Tip: If running on a fresh environment, install dependencies in the next cell.

In [1]:
# If needed, uncomment the next line to install packages
# !pip -q install yfinance pandas requests beautifulsoup4 lxml matplotlib

import pandas as pd
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 120)


ModuleNotFoundError: No module named 'yfinance'

In [2]:
def get_revenue_from_macrotrends(url: str, table_keyword: str):
    """
    Fetch a revenue table from Macrotrends and return a clean DataFrame with columns:
      ['Date', 'Revenue']
    The 'Revenue' column is cleaned to numeric (float) by removing $ and commas.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36'
    }
    html = requests.get(url, headers=headers, timeout=30).text
    soup = BeautifulSoup(html, 'html.parser')
    candidate_tables = soup.find_all('table')
    target = None
    for tbl in candidate_tables:
        if table_keyword.lower() in tbl.get_text(' ').lower():
            target = tbl
            break
    if target is None:
        target = candidate_tables[0] if candidate_tables else None
    if target is None:
        raise ValueError('No tables found on the page.')
    df = pd.read_html(str(target))[0]
    df = df.rename(columns={df.columns[0]: 'Date', df.columns[1]: 'Revenue'})
    df = df[df['Date'].notna() & df['Revenue'].notna()].copy()
    df['Revenue'] = (df['Revenue'].astype(str)
                     .str.replace('$', '', regex=False)
                     .str.replace(',', '', regex=False))
    df['Revenue'] = pd.to_numeric(df['Revenue'], errors='coerce')
    df = df.dropna(subset=['Revenue'])
    df['Date'] = df['Date'].astype(str)
    df = df.reset_index(drop=True)
    return df


## Question 1 — Use `yfinance` to Extract Tesla Stock Data

In [3]:
tesla = yf.Ticker('TSLA')
tesla_data = tesla.history(period='max')
tesla_data.reset_index(inplace=True)
tesla_data.head()


NameError: name 'yf' is not defined

## Question 2 — Use Webscraping to Extract Tesla Revenue Data

In [4]:
tsla_url = 'https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue'
tesla_revenue = get_revenue_from_macrotrends(tsla_url, table_keyword='Tesla Quarterly Revenue')
tesla_revenue.tail()


NameError: name 'requests' is not defined

## Question 3 — Use `yfinance` to Extract GameStop Stock Data

In [5]:
gme = yf.Ticker('GME')
gme_data = gme.history(period='max')
gme_data.reset_index(inplace=True)
gme_data.head()


NameError: name 'yf' is not defined

## Question 4 — Use Webscraping to Extract GameStop Revenue Data

In [6]:
gme_url = 'https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue'
gme_revenue = get_revenue_from_macrotrends(gme_url, table_keyword='GameStop Quarterly Revenue')
gme_revenue.tail()


NameError: name 'requests' is not defined

## Questions 5 & 6 — Plot Stock vs Revenue with `make_graph()`

In [7]:
from datetime import datetime

def make_graph(stock_data, revenue_data, stock_ticker):
    """
    Draw a two-panel chart: top = stock closing price; bottom = quarterly revenue.
    Expects:
      stock_data: DataFrame with 'Date' (datetime64) and 'Close' (float)
      revenue_data: DataFrame with 'Date' (str like 'YYYY-Q#' or 'YYYY-MM-DD') and 'Revenue' (float)
    """
    rev = revenue_data.copy()
    if rev['Date'].dtype == object and ('Q' in str(rev['Date'].iloc[0])):
        mapping = {'Q1':'-03-31','Q2':'-06-30','Q3':'-09-30','Q4':'-12-31'}
        rev['Date'] = (rev['Date'].str.replace('Q1', mapping['Q1'])
                                  .str.replace('Q2', mapping['Q2'])
                                  .str.replace('Q3', mapping['Q3'])
                                  .str.replace('Q4', mapping['Q4']))
    rev['Date'] = pd.to_datetime(rev['Date'], errors='coerce')
    rev = rev.dropna(subset=['Date', 'Revenue'])

    st = stock_data.copy()
    st['Date'] = pd.to_datetime(st['Date'], errors='coerce')
    st = st.dropna(subset=['Date', 'Close'])

    fig, axes = plt.subplots(2, 1, figsize=(12, 7), dpi=120)
    axes[0].plot(st['Date'], st['Close'])
    axes[0].set_title(f"{stock_ticker} Stock Price")
    axes[0].set_ylabel('Close')
    axes[1].bar(rev['Date'], rev['Revenue'])
    axes[1].set_title(f"{stock_ticker} Quarterly Revenue (USD)")
    axes[1].set_ylabel('Revenue')
    axes[1].set_xlabel('Date')
    fig.tight_layout()
    return fig

# Plot Tesla
fig_tsla = make_graph(tesla_data[['Date','Close']], tesla_revenue[['Date','Revenue']], 'TSLA')
plt.show()

# Plot GME
fig_gme = make_graph(gme_data[['Date','Close']], gme_revenue[['Date','Revenue']], 'GME')
plt.show()


NameError: name 'tesla_data' is not defined

### (Optional) Save cleaned datasets to CSV

In [8]:
tesla_data.to_csv('tesla_stock_data.csv', index=False)
tesla_revenue.to_csv('tesla_revenue_data.csv', index=False)
gme_data.to_csv('gme_stock_data.csv', index=False)
gme_revenue.to_csv('gme_revenue_data.csv', index=False)
print('Saved CSV files to working directory.')


NameError: name 'tesla_data' is not defined