In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_wikipedia_headers(url):
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all header tags (h1 to h6) and extract their text
        headers = [header.text for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]

        # Create a DataFrame from the header data
        headers_df = pd.DataFrame({'Headers': headers})

        # Return the DataFrame
        return headers_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
wikipedia_url = 'https://en.wikipedia.org/wiki/Main_Page'
headers_dataframe = scrape_wikipedia_headers(wikipedia_url)
print(headers_dataframe)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_former_presidents_india(url):
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the table containing presidents' data
        presidents_table = soup.find('table', {'class': 'tablepress'})

        # Initialize empty lists to store data
        names = []
        terms = []

        # Loop through table rows and extract data
        for row in presidents_table.find_all('tr')[1:]:
            columns = row.find_all('td')
            name = columns[0].get_text(strip=True)
            term = columns[1].get_text(strip=True)
            names.append(name)
            terms.append(term)

        # Create a DataFrame from the extracted data
        presidents_df = pd.DataFrame({'Name': names, 'Term of Office': terms})

        # Return the DataFrame
        return presidents_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
presidents_url = 'https://presidentofindia.nic.in/former-presidents.htm'
presidents_dataframe = scrape_former_presidents_india(presidents_url)
print(presidents_dataframe)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_mens_odi_teams(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the table containing ODI team rankings
        table = soup.find('table', {'class': 'table rankings-table'})

        # Initialize lists to store data
        teams = []
        matches = []
        points = []
        ratings = []

        # Loop through the table rows and extract data
        for row in table.find_all('tr')[1:11]:  # Extract top 10 teams
            columns = row.find_all('td')
            team = columns[1].text.strip()
            match = columns[2].text.strip()
            point = columns[3].text.strip()
            rating = columns[4].text.strip()

            teams.append(team)
            matches.append(match)
            points.append(point)
            ratings.append(rating)

        # Create a DataFrame from the extracted data
        data = {
            'Team': teams,
            'Matches': matches,
            'Points': points,
            'Rating': ratings
        }
        teams_df = pd.DataFrame(data)

        return teams_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
icc_mens_url = 'https://www.icc-cricket.com/rankings/mens/team-rankings/odi'
mens_odi_teams_df = scrape_mens_odi_teams(icc_mens_url)
print(mens_odi_teams_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_womens_odi_teams(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the table containing ODI team rankings for women
        table = soup.find('table', {'class': 'table rankings-table'})

        # Initialize lists to store data
        teams = []
        matches = []
        points = []
        ratings = []

        # Loop through the table rows and extract data
        for row in table.find_all('tr')[1:11]:  # Extract top 10 teams
            columns = row.find_all('td')
            team = columns[1].text.strip()
            match = columns[2].text.strip()
            point = columns[3].text.strip()
            rating = columns[4].text.strip()

            teams.append(team)
            matches.append(match)
            points.append(point)
            ratings.append(rating)

        # Create a DataFrame from the extracted data
        data = {
            'Team': teams,
            'Matches': matches,
            'Points': points,
            'Rating': ratings
        }
        teams_df = pd.DataFrame(data)

        return teams_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
icc_womens_url = 'https://www.icc-cricket.com/rankings/womens/team-rankings/odi'
womens_odi_teams_df = scrape_womens_odi_teams(icc_womens_url)
print(womens_odi_teams_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_cnbc_news(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the container that holds the news articles
        news_container = soup.find('div', {'class': 'Card'})
        
        # Initialize lists to store data
        headlines = []
        times = []
        news_links = []

        # Loop through the news articles and extract data
        articles = news_container.find_all('div', {'class': 'Card-title'})
        for article in articles:
            headline = article.text.strip()
            headlines.append(headline)
            
            # Find the time (you may need to adjust this based on the actual website structure)
            time = article.find_next('div', {'class': 'Card-time'}).text.strip()
            times.append(time)
            
            # Find the news link
            link = article.find('a')['href']
            news_links.append(link)

        # Create a DataFrame from the extracted data
        data = {
            'Headline': headlines,
            'Time': times,
            'News Link': news_links
        }
        news_df = pd.DataFrame(data)

        return news_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
cnbc_url = 'https://www.cnbc.com/world/?region=world'
cnbc_news_df = scrape_cnbc_news(cnbc_url)
print(cnbc_news_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_ai_articles(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the container that holds the articles
        articles_container = soup.find('div', {'id': 'most-downloaded-articles'})

        # Initialize lists to store data
        paper_titles = []
        authors_list = []
        published_dates = []
        paper_urls = []

        # Loop through the articles and extract data
        articles = articles_container.find_all('li')
        for article in articles:
            # Extract paper title
            title = article.find('a', {'class': 'article-content-title'}).text.strip()
            paper_titles.append(title)

            # Extract authors (may vary based on website structure)
            authors = article.find('div', {'class': 'authors'}).text.strip()
            authors_list.append(authors)

            # Extract published date (may vary based on website structure)
            date = article.find('div', {'class': 'published-date'}).text.strip()
            published_dates.append(date)

            # Extract paper URL
            url = article.find('a', {'class': 'article-content-title'})['href']
            paper_url = f'https://www.journals.elsevier.com{url}'
            paper_urls.append(paper_url)

        # Create a DataFrame from the extracted data
        data = {
            'Paper Title': paper_titles,
            'Authors': authors_list,
            'Published Date': published_dates,
            'Paper URL': paper_urls
        }
        articles_df = pd.DataFrame(data)

        return articles_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
ai_articles_url = 'https://www.journals.elsevier.com/artificial-intelligence/most-downloaded-articles'
ai_articles_df = scrape_ai_articles(ai_articles_url)
print(ai_articles_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_dineout_restaurants(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the container that holds the restaurant details
        restaurants_container = soup.find_all('div', {'class': 'restnt-card'})

        # Initialize lists to store data
        restaurant_names = []
        cuisines_list = []
        locations = []
        ratings_list = []
        image_urls = []

        # Loop through the restaurant details and extract data
        for restaurant in restaurants_container:
            # Extract restaurant name
            name = restaurant.find('div', {'class': 'restnt-card-index'})
            restaurant_name = name.text.strip() if name else 'N/A'
            restaurant_names.append(restaurant_name)

            # Extract cuisine
            cuisine = restaurant.find('div', {'class': 'restnt-cuisine-text'})
            cuisine_text = cuisine.text.strip() if cuisine else 'N/A'
            cuisines_list.append(cuisine_text)

            # Extract location
            location = restaurant.find('div', {'class': 'restnt-loc-text'})
            location_text = location.text.strip() if location else 'N/A'
            locations.append(location_text)

            # Extract ratings
            rating = restaurant.find('span', {'class': 'restnt-rating'})
            rating_text = rating.text.strip() if rating else 'N/A'
            ratings_list.append(rating_text)

            # Extract image URL
            image = restaurant.find('div', {'class': 'restnt-img'})
            image_url = image.find('img')['src'] if image else 'N/A'
            image_urls.append(image_url)

        # Create a DataFrame from the extracted data
        data = {
            'Restaurant Name': restaurant_names,
            'Cuisine': cuisines_list,
            'Location': locations,
            'Ratings': ratings_list,
            'Image URL': image_urls
        }
        restaurants_df = pd.DataFrame(data)

        return restaurants_df
    else:
        print("Failed to retrieve the web page.")
        return None

# Example usage:
dineout_url = 'https://www.dineout.co.in/bangalore-restaurants'
dineout_restaurants_df = scrape_dineout_restaurants(dineout_url)
print(dineout_restaurants_df)
