In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://en.wikipedia.org/wiki/Main_Page"
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")

    header_tags = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])

    header_text = [tag.text for tag in header_tags]

    df = pd.DataFrame(header_text, columns=["Header Text"])

    print(df)
else:
    print("Failed to retrieve the webpage. Status code:", response.status_code)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://presidentofindia.nic.in/former-presidents.htm"
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, "html.parser")

    table = soup.find("table")

    names = []
    terms_of_office = []

    for row in table.find_all("tr")[1:]:  
        columns = row.find_all("td")
        name = columns[0].text.strip()
        term_of_office = columns[1].text.strip()
        names.append(name)
        terms_of_office.append(term_of_office)

    data = {"Name": names, "Term of Office": terms_of_office}
    df = pd.DataFrame(data)

    print(df)
else:
    print("Failed to retrieve the webpage. Status code:", response.status_code)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_and_create_dataframe(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        headlines = []
        times = []
        news_links = []

        news_items = soup.find_all('div', class_='Card')
        for item in news_items:
            headline = item.find('h3', class_='Card-title').text.strip()
            time = item.find('time', class_='Card-time').text.strip()
            news_link = item.find('a', class_='Card-hed').get('href')

            headlines.append(headline)
            times.append(time)
            news_links.append(news_link)

        data = {
            'Headline': headlines,
            'Time': times,
            'News Link': news_links
        }

        df = pd.DataFrame(data)
        return df
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

cnbc_world_url = 'https://www.cnbc.com/world/?region=world'

cnbc_world_df = scrape_and_create_dataframe(cnbc_world_url)

if cnbc_world_df is not None:
    print("CNBC World News:")
    print(cnbc_world_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_and_create_dataframe(url):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        paper_titles = []
        authors = []
        published_dates = []
        paper_urls = []

        article_items = soup.find_all('div', class_='article-content')

        for item in article_items:
            title = item.find('a', class_='anchor-text').text.strip()
            author = item.find('span', class_='js-article-authors').text.strip()
            date = item.find('span', class_='js-article-date').text.strip()
            url = item.find('a', class_='anchor-text').get('href')

            paper_titles.append(title)
            authors.append(author)
            published_dates.append(date)
            paper_urls.append(url)

        data = {
            'Paper Title': paper_titles,
            'Authors': authors,
            'Published Date': published_dates,
            'Paper URL': paper_urls
        }

        df = pd.DataFrame(data)
        return df
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

elsevier_url = 'https://www.journals.elsevier.com/artificial-intelligence/most-downloaded-articles'

elsevier_df = scrape_and_create_dataframe(elsevier_url)

if elsevier_df is not None:
    print("Most Downloaded Articles in Artificial Intelligence:")
    print(elsevier_df)
