## EXAMPLE WITHOUT GRADIO APP

In [11]:
# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
from IPython.display import HTML

# Function to fetch and display tables from a URL
def fetch_and_display_tables(url):
    # Fetch the content from the URL
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to retrieve the content from the URL. Status code: {response.status_code}")
        return

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all tables in the HTML
    tables = soup.find_all('table')
    
    if not tables:
        print("No tables found in the provided URL.")
        return
    
    # Iterate through each table and display it
    for index, table in enumerate(tables):
        # Wrap the HTML table string in a StringIO object
        table_html = StringIO(str(table))
        
        # Read the HTML table into a DataFrame
        df = pd.read_html(table_html)[0]
        
        # Display the table
        display_name = f"<h2>Table {index + 1}"
        display(HTML(display_name))
        display(df)


In [12]:

# Example usage
url = 'https://www.bbc.com/sport/olympics/paris-2024/medals'
fetch_and_display_tables(url)


Unnamed: 0,Rank,Country,GGold,SSilver,BBronze,TotalTotal
0,1,JPNJapan,7,2,4,13
1,2,CHNChina,6,6,2,14
2,3,AUSAustralia,6,4,1,11
3,4,FRAFrance,5,9,4,18
4,5,KORSouth Korea,5,3,3,11
...,...,...,...,...,...,...
200,44,BVIBritish Virgin Islands,0,0,0,0
201,44,AVIAmerican Virgin Islands,0,0,0,0
202,44,YEMYemen,0,0,0,0
203,44,ZAMZambia,0,0,0,0


## EXAMPLE WITH GRADIO APP

In [35]:
# Import necessary libraries
import requests  # To make HTTP requests
from bs4 import BeautifulSoup  # To parse HTML content
import pandas as pd  # To handle data in table format
from io import StringIO  # To handle string input/output as file
import gradio as gr  # To create the interactive web interface
import sys  # To interact with the interpreter
import os  # To interact with the operating system
import contextlib  # To manage contexts, useful for suppressing output

# Global variable to store dataframes
dfs = None

# Function to fetch and display tables from a URL
def fetch_and_display_tables(url):
    """
    Fetch HTML tables from a given URL and display them as HTML.

    Args:
    url (str): The URL to fetch tables from.

    Returns:
    str: HTML string containing the tables found in the URL.
    """
    global dfs  # Use the global variable to store dataframes
    
    # Fetch the content from the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        return f"Failed to retrieve the content from the URL. Status code: {response.status_code}"

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all tables in the HTML
    tables = soup.find_all('table')
    
    # If no tables are found, return a message
    if not tables:
        return "No tables found in the provided URL."
    
    # List to store all dataframes
    dfs = []
    
    # Iterate through each table and store it
    for index, table in enumerate(tables):
        # Convert the HTML table to a string and wrap it in a StringIO object
        table_html = StringIO(str(table))
        
        # Read the HTML table into a DataFrame using pandas
        df = pd.read_html(table_html)[0]
        
        # Append the dataframe to the list with a corresponding name
        dfs.append((f"Table {index + 1}", df))
    
    # Create an HTML string representation of each dataframe
    tables_html = ""
    for name, df in dfs:
        if len(df)<11:
            tables_html += f"<h3>{name}</h3>"  # Add a heading for the table
        else:
            tables_html += f"<h3>{name} - Truncated to 10 rows ({len(df)} total rows)</h3>"  # Add a heading for the table
        tables_html += df.head(10).to_html(index=False)  # Convert the first 10 rows of the dataframe to HTML
    
    # Indicate that all tables are displayed
    tables_html += f"<h3>No more tables to display</h3>"
    return tables_html

# Function to be used in Gradio app
def gradio_table_fetcher(url):
    """
    Wrapper function to fetch and display tables for Gradio interface.

    Args:
    url (str): The URL to fetch tables from.

    Returns:
    str: HTML string containing the tables found in the URL.
    """
    return fetch_and_display_tables(url)

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_table_fetcher,  # Function to call when the button is pressed
    inputs="text",  # Input type is text
    outputs="html",  # Output type is HTML
    title="Table Fetcher",  # Title of the interface
    description="Enter a URL and fetch tables found at the URL",  # Description of the interface
    allow_flagging="never"  # Disable flagging feature
)

# Context manager to suppress stdout and stderr
@contextlib.contextmanager
def suppress_stdout_stderr():
    """
    Context manager to suppress standard output and standard error.

    This is useful to prevent cluttering the notebook with Gradio's launch messages.
    """
    with open(os.devnull, 'w') as devnull:
        old_stdout = sys.stdout  # Save the current stdout
        old_stderr = sys.stderr  # Save the current stderr
        sys.stdout = devnull  # Redirect stdout to devnull
        sys.stderr = devnull  # Redirect stderr to devnull
        try:
            yield  # Yield control back to the caller
        finally:
            sys.stdout = old_stdout  # Restore original stdout
            sys.stderr = old_stderr  # Restore original stderr

# Launch the Gradio app without displaying the output message
with suppress_stdout_stderr():
    iface.launch()  # Launch the Gradio interface


In [29]:
# # Import necessary libraries
# import requests
# from bs4 import BeautifulSoup
# import pandas as pd
# from io import StringIO
# import gradio as gr

# # Global variable to store dataframes and table options
# dfs = []
# selected_table = None
# table_options = []

# # Function to fetch and display tables from a URL
# def fetch_and_display_tables(url):
#     global dfs, table_options
#     # Fetch the content from the URL
#     response = requests.get(url)
    
#     if response.status_code != 200:
#         return f"Failed to retrieve the content from the URL. Status code: {response.status_code}"

#     # Parse the HTML content
#     soup = BeautifulSoup(response.content, 'html.parser')
    
#     # Find all tables in the HTML
#     tables = soup.find_all('table')
    
#     if not tables:
#         return "No tables found in the provided URL."
    
#     # List to store all dataframes and reset table options
#     dfs = []
#     table_options = []
    
#     # Iterate through each table and store it
#     for index, table in enumerate(tables):
#         # Wrap the HTML table string in a StringIO object
#         table_html = StringIO(str(table))
        
#         # Read the HTML table into a DataFrame
#         df = pd.read_html(table_html)[0]
        
#         # Append the dataframe to the list
#         dfs.append(df)
#         table_options.append(f"Table {index + 1}")
    
#     # Create a string representation of each dataframe
#     tables_html = ""
#     for index, df in enumerate(dfs):
#         tables_html += f"<h3>Table {index + 1}</h3>"
#         tables_html += df.head(10).to_html(index=False)
    
#     return tables_html

# # Function to select a table
# def select_table(table_name):
#     global selected_table
#     table_index = int(table_name.split()[-1]) - 1
#     selected_table = dfs[table_index]
#     return f"Selected {table_name} is available for further analysis."

# # Function to be used in Gradio app
# def gradio_table_fetcher(url):
#     return fetch_and_display_tables(url)

# # Create the Gradio interface for fetching tables
# fetch_iface = gr.Interface(
#     fn=gradio_table_fetcher,
#     inputs="text",
#     outputs="html",
#     title="Table Fetcher",
#     description="Enter a URL and fetch tables found at the URL"
# )

# # Create the Gradio interface for selecting a table using a dropdown
# select_iface = gr.Interface(
#     fn=select_table,
#     inputs=gr.Dropdown(choices=table_options, label="Select Table"),
#     outputs="text",
#     title="Select Table",
#     description="Select a table from the dropdown",
#     allow_flagging="never"
# )

# # Launch both Gradio apps
# fetch_iface.launch(share=False)
# select_iface.launch(share=False)
