<a href="https://colab.research.google.com/github/Tanvir284/CSE-122-Fall-2022-49-10-/blob/main/Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# ==============================================================================
# Step 1: Install and Import Necessary Libraries
# ==============================================================================
# This command installs all the required Python packages.
!pip install yfinance pandas requests beautifulsoup4 plotly

# Importing the libraries into our script.
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ==============================================================================
# Question 1: Extracting Tesla Stock Data Using yfinance
# ==============================================================================
print("--- Extracting Tesla (TSLA) Stock Data ---")
# Create a ticker object for Tesla (TSLA).
tesla = yf.Ticker("TSLA")

# Get historical market data for the maximum possible period.
tesla_data = tesla.history(period="max")

# Reset the index to make 'Date' a proper column.
tesla_data.reset_index(inplace=True)

# Display the first five rows of the dataframe to verify.
print("Tesla Stock Data (first 5 rows):")
print(tesla_data.head())
print("\n" + "="*50 + "\n")


# ==============================================================================
# Question 2: Extracting Tesla Revenue Data Using Web Scraping
# ==============================================================================
print("--- Extracting Tesla (TSLA) Revenue Data ---")
# The URL for Tesla's revenue data on Macrotrends.
url_tesla = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"

# Send a request to the URL and get the HTML content.
html_data_tesla = requests.get(url_tesla).text

# Parse the HTML content using BeautifulSoup.
soup_tesla = BeautifulSoup(html_data_tesla, "html.parser")

# Find all tables on the page.
tables_tesla = soup_tesla.find_all('table', class_='historical_data_table')
tesla_revenue = pd.DataFrame() # Initialize an empty dataframe to be safe.

# Loop through tables to find the one with quarterly revenue data.
for table in tables_tesla:
    if "Tesla Quarterly Revenue" in table.find('th').text:
        tesla_revenue = pd.read_html(str(table))[0]
        break

# Check if the DataFrame is still empty after the loop
if tesla_revenue.empty:
    print("Could not find the Tesla Quarterly Revenue table.")
else:
    # Clean and format the revenue data.
    tesla_revenue.columns = ["Date", "Revenue"]
    tesla_revenue["Revenue"] = tesla_revenue['Revenue'].str.replace('$', '').str.replace(',', '')
    tesla_revenue.dropna(inplace=True)
    tesla_revenue = tesla_revenue[tesla_revenue['Revenue'] != ""]

    # Display the last five rows of the cleaned data.
    print("Tesla Revenue Data (last 5 rows):")
    print(tesla_revenue.tail())
    print("\n" + "="*50 + "\n")


# ==============================================================================
# Question 3: Extracting GameStop Stock Data Using yfinance
# ==============================================================================
print("--- Extracting GameStop (GME) Stock Data ---")
# Create a ticker object for GameStop (GME).
gme = yf.Ticker("GME")

# Get historical market data for GameStop.
gme_data = gme.history(period="max")

# Reset the index.
gme_data.reset_index(inplace=True)

# Display the first five rows.
print("GameStop Stock Data (first 5 rows):")
print(gme_data.head())
print("\n" + "="*50 + "\n")


# ==============================================================================
# Question 4: Extracting GameStop Revenue Data Using Web Scraping
# ==============================================================================
print("--- Extracting GameStop (GME) Revenue Data ---")
# The URL for GameStop's revenue data.
url_gme = "https://www.macrotrends.net/stocks/charts/GME/gamestop/revenue"

# Get and parse the HTML from the URL.
html_data_gme = requests.get(url_gme).text
soup_gme = BeautifulSoup(html_data_gme, "html.parser")

# Find the correct table containing the revenue data.
tables_gme = soup_gme.find_all('table', class_='historical_data_table')
gme_revenue = pd.DataFrame()

for table in tables_gme:
    if "GameStop Quarterly Revenue" in table.find('th').text:
        gme_revenue = pd.read_html(str(table))[0]
        break

# Check if the DataFrame is still empty after the loop
if gme_revenue.empty:
    print("Could not find the GameStop Quarterly Revenue table.")
else:
    # Clean and format the GameStop revenue data.
    gme_revenue.columns = ["Date", "Revenue"]
    gme_revenue["Revenue"] = gme_revenue['Revenue'].str.replace('$', '').str.replace(',', '')
    gme_revenue.dropna(inplace=True)
    gme_revenue = gme_revenue[gme_revenue['Revenue'] != ""]

    # Display the last five rows of the cleaned data.
    print("GameStop Revenue Data (last 5 rows):")
    print(gme_revenue.tail())
    print("\n" + "="*50 + "\n")


# ==============================================================================
# Questions 5 & 6: Plotting Dashboards
# ==============================================================================
# Define a function to create the plots to avoid repeating code.
def make_graph(stock_data, revenue_data, stock_name):
    """
    Creates and displays an interactive dual-axis plot of a stock's
    closing price and its quarterly revenue.
    """
    # Create a figure with a secondary y-axis.
    fig = make_subplots(rows=1, cols=1, shared_xaxes=True,
                        specs=[[{"secondary_y": True}]])

    # Add the stock price trace to the primary y-axis.
    fig.add_trace(go.Scatter(x=stock_data['Date'], y=stock_data['Close'],
                             name='Stock Price'), secondary_y=False)

    # Add the revenue trace to the secondary y-axis.
    fig.add_trace(go.Scatter(x=revenue_data['Date'], y=revenue_data['Revenue'].astype(float),
                             name='Revenue'), secondary_y=True)

    # Add figure title and axis labels.
    fig.update_layout(title_text=f"{stock_name} Stock Price vs. Revenue",
                      xaxis_title="Date")
    fig.update_yaxes(title_text="Stock Price ($)", secondary_y=False)

--- Extracting Tesla (TSLA) Stock Data ---
Tesla Stock Data (first 5 rows):
                       Date      Open      High       Low     Close  \
0 2010-06-29 00:00:00-04:00  1.266667  1.666667  1.169333  1.592667   
1 2010-06-30 00:00:00-04:00  1.719333  2.028000  1.553333  1.588667   
2 2010-07-01 00:00:00-04:00  1.666667  1.728000  1.351333  1.464000   
3 2010-07-02 00:00:00-04:00  1.533333  1.540000  1.247333  1.280000   
4 2010-07-06 00:00:00-04:00  1.333333  1.333333  1.055333  1.074000   

      Volume  Dividends  Stock Splits  
0  281494500        0.0           0.0  
1  257806500        0.0           0.0  
2  123282000        0.0           0.0  
3   77097000        0.0           0.0  
4  103003500        0.0           0.0  


--- Extracting Tesla (TSLA) Revenue Data ---
Could not find the Tesla Quarterly Revenue table.
--- Extracting GameStop (GME) Stock Data ---
GameStop Stock Data (first 5 rows):
                       Date      Open      High       Low     Close    Volume  

# Task
Explain the error in the selected code, which is failing to plot Tesla and GameStop revenue data. If possible, fix the error by finding alternative data sources for historical quarterly revenue data for Tesla and GameStop, extracting and cleaning this data, and integrating it into the existing code to plot the stock price and revenue data for both companies. If fixing is not possible, diagnose the error.

## Find alternative revenue data sources

### Subtask:
Search for alternative websites or APIs that provide historical quarterly revenue data for Tesla and GameStop.


**Reasoning**:
The previous attempt to scrape revenue data from Macrotrends failed. I need to find alternative data sources for Tesla and GameStop quarterly revenue data. I will use Google to search for financial data providers, APIs, or reliable web sources.



In [9]:
# Perform Google searches for alternative data sources.
print("Searching for alternative data sources for Tesla and GameStop revenue...")

# Search terms for Tesla
tesla_search_terms = [
    "Tesla quarterly revenue data API",
    "Tesla historical financial data",
    "Tesla revenue data alternative source"
]

# Search terms for GameStop
gme_search_terms = [
    "GameStop quarterly revenue data API",
    "GameStop historical financial data",
    "GameStop revenue data alternative source"
]

print("Potential search terms:")
print("Tesla:", tesla_search_terms)
print("GameStop:", gme_search_terms)

# Note: This step is conceptual as I cannot directly execute web searches.
# I will assume successful searches lead to potential sources like Alpha Vantage, Financial Modeling Prep, or similar financial data providers.
# I will then need to investigate these sources for quarterly revenue data availability and accessibility.

Searching for alternative data sources for Tesla and GameStop revenue...
Potential search terms:
Tesla: ['Tesla quarterly revenue data API', 'Tesla historical financial data', 'Tesla revenue data alternative source']
GameStop: ['GameStop quarterly revenue data API', 'GameStop historical financial data', 'GameStop revenue data alternative source']


**Reasoning**:
Based on the conceptual search, I will identify potential alternative data sources that are commonly used for financial data and check if they provide historical quarterly revenue data for publicly traded companies like Tesla and GameStop. Common sources include financial data APIs like Alpha Vantage, Financial Modeling Prep, or potentially scraping from reputable financial websites if API access is not feasible or free. I will then note down the identified sources.



In [13]:
# Identifying potential alternative data sources based on common financial data providers.
# I will assume that through the search, I have identified the following potential sources:
# 1. Alpha Vantage API: Known for providing financial data, including income statements.
# 2. Financial Modeling Prep API: Another popular financial data API with extensive data.
# 3. Potentially scraping from financial websites like Yahoo Finance, Google Finance, or others, though APIs are preferred for structured data.

# For the purpose of this task, I will focus on identifying API sources as they are generally easier to work with programmatically.

# Potential API Source 1: Alpha Vantage
alpha_vantage_financials_endpoint = "https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey=YOUR_API_KEY"
print(f"Potential API Source 1 (Alpha Vantage Financials Endpoint): {alpha_vantage_financials_endpoint}")

# Potential API Source 2: Financial Modeling Prep
fmp_financials_endpoint = "https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey=YOUR_API_KEY"
print(f"Potential API Source 2 (Financial Modeling Prep Financials Endpoint): {fmp_financials_endpoint}")

# Note: Both of these require API keys, which may require registration and could have usage limits.
# I will proceed with the assumption that one of these sources will be used in subsequent steps to extract the data.

Potential API Source 1 (Alpha Vantage Financials Endpoint): https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={symbol}&apikey=YOUR_API_KEY
Potential API Source 2 (Financial Modeling Prep Financials Endpoint): https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey=YOUR_API_KEY


## Extract revenue data

### Subtask:
Write code to extract the revenue data from the new source(s).


**Reasoning**:
Based on the previous step, Financial Modeling Prep is a suitable alternative data source for quarterly revenue data. I will use its API to retrieve the income statements for Tesla and GameStop, extract the quarterly revenue, and store it in DataFrames.



In [14]:
# Assuming an API key for Financial Modeling Prep has been obtained and stored securely.
# Replace 'YOUR_API_KEY' with your actual API key.
fmp_api_key = "YOUR_API_KEY" # Placeholder for the API key.

def get_quarterly_revenue(symbol, api_key):
    """
    Retrieves quarterly revenue data for a given stock symbol using the Financial Modeling Prep API.

    Args:
        symbol (str): The stock ticker symbol (e.g., 'TSLA', 'GME').
        api_key (str): Your Financial Modeling Prep API key.

    Returns:
        pandas.DataFrame: A DataFrame with 'Date' and 'Revenue' columns, or an empty DataFrame if data retrieval fails.
    """
    url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey={api_key}"
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise an exception for bad status codes
        data = response.json()

        if not data:
            print(f"No quarterly income statement data found for {symbol}.")
            return pd.DataFrame()

        # Extract date and revenue for each quarter
        revenue_list = []
        for quarter_data in data:
            date = quarter_data.get('date')
            revenue = quarter_data.get('revenue')
            if date and revenue is not None:
                revenue_list.append({'Date': date, 'Revenue': revenue})

        revenue_df = pd.DataFrame(revenue_list)
        # Ensure 'Date' is datetime and sort
        revenue_df['Date'] = pd.to_datetime(revenue_df['Date'])
        revenue_df.sort_values(by='Date', inplace=True)
        return revenue_df

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()
    except Exception as e:
        print(f"An unexpected error occurred for {symbol}: {e}")
        return pd.DataFrame()


print("--- Extracting Tesla (TSLA) Revenue Data from Financial Modeling Prep ---")
tesla_revenue_fmp = get_quarterly_revenue("TSLA", fmp_api_key)

if not tesla_revenue_fmp.empty:
    # Display the last five rows of the cleaned data.
    print("Tesla Revenue Data (last 5 rows) from FMP:")
    display(tesla_revenue_fmp.tail())
else:
    print("Failed to extract Tesla revenue data from FMP.")

print("\n" + "="*50 + "\n")

print("--- Extracting GameStop (GME) Revenue Data from Financial Modeling Prep ---")
gme_revenue_fmp = get_quarterly_revenue("GME", fmp_api_key)

if not gme_revenue_fmp.empty:
    # Display the last five rows of the cleaned data.
    print("GameStop Revenue Data (last 5 rows) from FMP:")
    display(gme_revenue_fmp.tail())
else:
     print("Failed to extract GameStop revenue data from FMP.")

print("\n" + "="*50 + "\n")

# The extracted revenue dataframes are tesla_revenue_fmp and gme_revenue_fmp
# These will be used in the next step to plot the data.

--- Extracting Tesla (TSLA) Revenue Data from Financial Modeling Prep ---
Error fetching data for TSLA: 401 Client Error: Unauthorized for url: https://financialmodelingprep.com/api/v3/income-statement/TSLA?period=quarter&apikey=YOUR_API_KEY
Failed to extract Tesla revenue data from FMP.


--- Extracting GameStop (GME) Revenue Data from Financial Modeling Prep ---
Error fetching data for GME: 401 Client Error: Unauthorized for url: https://financialmodelingprep.com/api/v3/income-statement/GME?period=quarter&apikey=YOUR_API_KEY
Failed to extract GameStop revenue data from FMP.




## Extract revenue data

### Subtask:
Retry extracting revenue data from the Financial Modeling Prep API for Tesla and GameStop, ensuring a valid API key is used.


**Reasoning**:
Re-attempt to extract revenue data using a valid API key and check if the dataframes are populated.



In [12]:
# Replace the placeholder with your actual, valid Financial Modeling Prep API key.
# Note: This requires you to have obtained a key from Financial Modeling Prep.
# As an AI, I cannot obtain or use a real API key. I will use a placeholder again,
# assuming in a real scenario, the user would replace this with their valid key.
# Therefore, this attempt is expected to fail if the placeholder is not replaced.
fmp_api_key = "YOUR_API_KEY" # PLEASE REPLACE WITH YOUR ACTUAL API KEY

def get_quarterly_revenue(symbol, api_key):
    """
    Retrieves quarterly revenue data for a given stock symbol using the Financial Modeling Prep API.

    Args:
        symbol (str): The stock ticker symbol (e.g., 'TSLA', 'GME').
        api_key (str): Your Financial Modeling Prep API key.

    Returns:
        pandas.DataFrame: A DataFrame with 'Date' and 'Revenue' columns, or an empty DataFrame if data retrieval fails.
    """
    url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey={api_key}"
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise an exception for bad status codes
        data = response.json()

        if not data:
            print(f"No quarterly income statement data found for {symbol}.")
            return pd.DataFrame()

        # Extract date and revenue for each quarter
        revenue_list = []
        for quarter_data in data:
            date = quarter_data.get('date')
            revenue = quarter_data.get('revenue')
            if date and revenue is not None:
                # Convert revenue to numeric, handling potential errors
                try:
                    revenue = float(revenue)
                    revenue_list.append({'Date': date, 'Revenue': revenue})
                except ValueError:
                    print(f"Could not convert revenue '{revenue}' to float for date '{date}'. Skipping.")
                    continue


        revenue_df = pd.DataFrame(revenue_list)
        if not revenue_df.empty:
            # Ensure 'Date' is datetime and sort
            revenue_df['Date'] = pd.to_datetime(revenue_df['Date'])
            revenue_df.sort_values(by='Date', inplace=True)
            # Check for duplicate dates and keep the last one if necessary
            revenue_df.drop_duplicates(subset=['Date'], keep='last', inplace=True)
            # Reset index after sorting and dropping duplicates
            revenue_df.reset_index(drop=True, inplace=True)
        return revenue_df

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()
    except Exception as e:
        print(f"An unexpected error occurred for {symbol}: {e}")
        return pd.DataFrame()


print("--- Retrying Extracting Tesla (TSLA) Revenue Data from Financial Modeling Prep ---")
# Re-execute the call for Tesla
tesla_revenue_fmp = get_quarterly_revenue("TSLA", fmp_api_key)

# Check and display Tesla data
if not tesla_revenue_fmp.empty:
    print("Tesla Revenue Data (last 5 rows) from FMP:")
    display(tesla_revenue_fmp.tail())
else:
    print("Failed to extract Tesla revenue data from FMP. Please check your API key and limits.")

print("\n" + "="*50 + "\n")

print("--- Retrying Extracting GameStop (GME) Revenue Data from Financial Modeling Prep ---")
# Re-execute the call for GameStop
gme_revenue_fmp = get_quarterly_revenue("GME", fmp_api_key)

# Check and display GameStop data
if not gme_revenue_fmp.empty:
    print("GameStop Revenue Data (last 5 rows) from FMP:")
    display(gme_revenue_fmp.tail())
else:
     print("Failed to extract GameStop revenue data from FMP. Please check your API key and limits.")

print("\n" + "="*50 + "\n")

# The extracted revenue dataframes are tesla_revenue_fmp and gme_revenue_fmp
# These will be used in the next step to plot the data if populated.

--- Retrying Extracting Tesla (TSLA) Revenue Data from Financial Modeling Prep ---
Error fetching data for TSLA: 401 Client Error: Unauthorized for url: https://financialmodelingprep.com/api/v3/income-statement/TSLA?period=quarter&apikey=YOUR_API_KEY
Failed to extract Tesla revenue data from FMP. Please check your API key and limits.


--- Retrying Extracting GameStop (GME) Revenue Data from Financial Modeling Prep ---
Error fetching data for GME: 401 Client Error: Unauthorized for url: https://financialmodelingprep.com/api/v3/income-statement/GME?period=quarter&apikey=YOUR_API_KEY
Failed to extract GameStop revenue data from FMP. Please check your API key and limits.




## Summary:

### Data Analysis Key Findings

*   The initial attempt to obtain revenue data from Macrotrends was unsuccessful.
*   Alternative data sources, specifically financial data APIs like Alpha Vantage and Financial Modeling Prep, were identified as potential sources for quarterly revenue data.
*   Attempts to extract revenue data using the Financial Modeling Prep API failed due to the absence of a valid API key, resulting in a `401 Client Error: Unauthorized`.
*   The data extraction step could not be completed because a valid API key was required and not provided.

### Insights or Next Steps

*   To proceed with plotting the revenue data, a valid API key for a financial data provider like Financial Modeling Prep or Alpha Vantage is required to successfully extract the necessary quarterly revenue information for Tesla and GameStop.
*   Once the revenue data is successfully extracted, it needs to be integrated with the stock price data based on their respective dates for plotting and analysis.
