In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### 1) Write a python program to display all the header tags from wikipedia.org and make data frame.

In [3]:
def scrape_wikipedia_headers(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    headers = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
    headers_text = [header.text for header in headers]
    df = pd.DataFrame({'Header': headers_text})
    return df

df = scrape_wikipedia_headers('https://en.wikipedia.org/wiki/Main_Page')

print(df)


                          Header
0                      Main Page
1           Welcome to Wikipedia
2  From today's featured article
3               Did you know ...
4                    In the news
5                    On this day
6       Today's featured picture
7       Other areas of Wikipedia
8    Wikipedia's sister projects
9            Wikipedia languages


### 2) Write s python program to display list of respected former presidents of India(i.e. Name , Term ofoffice)
from https://presidentofindia.nic.in/former-presidents.htm and make data frame

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_former_presidents(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all the div elements with class "presidentListing"
    divs = soup.find_all('div', class_='presidentListing')

    # Initialize empty lists to store the data
    names = []
    terms = []

    # Iterate over the div elements
    for div in divs:
        # Extract the name and term of office
        name = div.find('h3').text.strip()
        term = div.find('p').text.strip()

        # Append the data to the respective lists
        names.append(name)
        terms.append(term)

    # Create a pandas DataFrame with the extracted data
    df = pd.DataFrame({'Name': names, 'Term of Office': terms})

    return df


In [5]:
url = 'https://presidentofindia.nic.in/former-presidents.htm'
presidents_df = scrape_former_presidents(url)

if presidents_df is not None:
    print(presidents_df)


                                           Name  \
0           Shri Ram Nath Kovind (birth - 1945)   
1             Shri Pranab Mukherjee (1935-2020)   
2   Smt Pratibha Devisingh Patil (birth - 1934)   
3            DR. A.P.J. Abdul Kalam (1931-2015)   
4            Shri K. R. Narayanan (1920 - 2005)   
5           Dr Shankar Dayal Sharma (1918-1999)   
6               Shri R Venkataraman (1910-2009)   
7                  Giani Zail Singh (1916-1994)   
8         Shri Neelam Sanjiva Reddy (1913-1996)   
9          Dr. Fakhruddin Ali Ahmed (1905-1977)   
10     Shri Varahagiri Venkata Giri (1894-1980)   
11                 Dr. Zakir Husain (1897-1969)   
12     Dr. Sarvepalli Radhakrishnan (1888-1975)   
13              Dr. Rajendra Prasad (1884-1963)   

                                       Term of Office  
0      Term of Office: 25 July, 2017 to 25 July, 2022  
1      Term of Office: 25 July, 2012 to 25 July, 2017  
2      Term of Office: 25 July, 2007 to 25 July, 2012  
3      Ter

### 3) Write a python program to scrape cricket rankings from icc-cricket.com. You have to scrape and make data frame
#### **a) Top 10 ODI teams in men’s cricket along with the records for matches, points and rating.**
#### b) Top 10 ODI Batsmen along with the records of their team andrating.

#### c) Top 10 ODI bowlers along with the records of their team andrating.

In [6]:
# a) Top 10 ODI teams in men’s cricket along with the records for matches, points and rating.

def scrape_odi_teams(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    teams = []
    matches = []
    points = []
    ratings = []

    for row in rows[1:]:
        columns = row.find_all('td')
        team = columns[1].find('span', class_='u-hide-phablet').text.strip()
        match = columns[2].text.strip()
        point = columns[3].text.strip().replace(',', '')
        rating = columns[4].text.strip()

        teams.append(team)
        matches.append(match)
        points.append(point)
        ratings.append(rating)

    df = pd.DataFrame({
        'Team': teams,
        'Matches': matches,
        'Points': points,
        'Rating': ratings
    })

    return df

def scrape_odi_players(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    players = []
    teams = []
    ratings = []

    for row in rows[1:]:
        columns = row.find_all('td')
        player_element = columns[1].find('span', class_='u-hide-phablet')
        player = player_element.text.strip() if player_element else ''
        team_element = columns[2].find('span', class_='u-hide-phablet')
        team = team_element.text.strip() if team_element else ''
        rating = columns[4].text.strip()

        players.append(player)
        teams.append(team)
        ratings.append(rating)

    df = pd.DataFrame({
        'Player': players,
        'Team': teams,
        'Rating': ratings
    })

    return df

teams_url = 'https://www.icc-cricket.com/rankings/mens/team-rankings/odi'
odi_teams_df = scrape_odi_teams(teams_url)


print("Top 10 ODI Teams in Men's Cricket:")
print(odi_teams_df.head(10))
print()



Top 10 ODI Teams in Men's Cricket:
           Team Matches Points Rating
0     Australia      23   2714    118
1      Pakistan      20   2316    116
2         India      33   3807    115
3   New Zealand      27   2806    104
4       England      24   2426    101
5  South Africa      19   1910    101
6    Bangladesh      25   2451     98
7     Sri Lanka      28   2378     85
8   Afghanistan      13   1067     82
9   West Indies      32   2201     69



In [7]:
# b) Top 10 ODI Batsmen along with the records of their team andrating.

def scrape_odi_players(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    players = []
    teams = []
    ratings = []

    for row in rows[1:]:
        columns = row.find_all('td')
        player_element = columns[1].find('a')
        player = player_element.text.strip() if player_element else ''
        team_element = columns[2].find('span', class_='table-body__logo-text')
        team = team_element.text.strip() if team_element else ''
        rating = columns[3].text.strip()

        players.append(player)
        teams.append(team)
        ratings.append(rating)

    df = pd.DataFrame({
        'Player': players,
        'Team': teams,
        'Rating': ratings
    })

    return df

batsmen_url = 'https://www.icc-cricket.com/rankings/mens/player-rankings/odi/batting'
odi_batsmen_df = scrape_odi_players(batsmen_url)

print("Top 10 Men's ODI Batsmen:")
print(odi_batsmen_df.head(10))
print()


Top 10 ODI Batsmen:
                  Player Team Rating
0                                886
1  Rassie van der Dussen   SA    777
2           Fakhar Zaman  PAK    755
3            Imam-ul-Haq  PAK    745
4           Shubman Gill  IND    738
5           Harry Tector  IRE    726
6           David Warner  AUS    726
7            Virat Kohli  IND    719
8        Quinton de Kock   SA    718
9           Rohit Sharma  IND    707



In [13]:
#c) Top 10 ODI bowlers along with the records of their team andrating.

def scrape_odi_players(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    players = []
    teams = []
    ratings = []

    for row in rows[1:]:
        columns = row.find_all('td')
        player_element = columns[1].find('a')
        player = player_element.text.strip() if player_element else ''
        team_element = columns[2].find('span', class_='table-body__logo-text')
        team = team_element.text.strip() if team_element else ''
        rating = columns[3].text.strip()

        players.append(player)
        teams.append(team)
        ratings.append(rating)

    df = pd.DataFrame({
        'Player': players,
        'Team': teams,
        'Rating': ratings
    })

    return df

bowlers_url = 'https://www.icc-cricket.com/rankings/mens/player-rankings/odi/bowling'
odi_bowlers_df = scrape_odi_players(bowlers_url)


print("Top 10 Men's ODI Bowlers:")
print(odi_bowlers_df.head(10))
print()

Top 10 Men's ODI Bowlers:
             Player Team Rating
0                           705
1    Mohammed Siraj  IND    691
2    Mitchell Starc  AUS    686
3        Matt Henry   NZ    667
4       Trent Boult   NZ    660
5        Adam Zampa  AUS    652
6       Rashid Khan  AFG    640
7    Shaheen Afridi  PAK    630
8  Mujeeb Ur Rahman  AFG    630
9     Mohammad Nabi  AFG    626



### 4) Write a python program to scrape cricket rankings from icc-cricket.com. You have to scrape and make data frame
#### a) Top 10 ODI teams in women’s cricket along with the records for matches, points and rating.
#### b) Top 10 women’s ODI Batting players along with the records of their team and rating.
#### c) Top 10 women’s ODI all-rounder along with the records of their team and rating.

In [14]:
def scrape_odi_teams_women(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    teams = []
    matches = []
    points = []
    ratings = []

    for row in rows[1:11]:  # Limit to the top 10 teams
        columns = row.find_all('td')
        team_element = columns[1].find('span', class_='u-hide-phablet')
        team = team_element.text.strip() if team_element else ''
        match = columns[2].text.strip()
        point = columns[3].text.strip().replace(',', '')
        rating = columns[4].text.strip()

        teams.append(team)
        matches.append(match)
        points.append(point)
        ratings.append(rating)

    df = pd.DataFrame({
        'Team': teams,
        'Matches': matches,
        'Points': points,
        'Rating': ratings
    })

    return df

def scrape_odi_batting_players_women(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    players = []
    teams = []
    ratings = []

    for row in rows[1:11]:  # Limit to the top 10 players
        columns = row.find_all('td')
        player_element = columns[1].find('a')
        player = player_element.text.strip() if player_element else ''
        team_element = columns[2].find('span', class_='table-body__logo-text')
        team = team_element.text.strip() if team_element else ''
        rating = columns[3].text.strip()

        players.append(player)
        teams.append(team)
        ratings.append(rating)

    df = pd.DataFrame({
        'Player': players,
        'Team': teams,
        'Rating': ratings
    })

    return df

def scrape_odi_allrounders_women(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')

    players = []
    teams = []
    ratings = []

    for row in rows[1:11]:  # Limit to the top 10 players
        columns = row.find_all('td')
        player_element = columns[1].find('a')
        player = player_element.text.strip() if player_element else ''
        team_element = columns[2].find('span', class_='table-body__logo-text')
        team = team_element.text.strip() if team_element else ''
        rating = columns[3].text.strip()

        players.append(player)
        teams.append(team)
        ratings.append(rating)

    df = pd.DataFrame({
        'Player': players,
        'Team': teams,
        'Rating': ratings
    })

    return df

teams_url = 'https://www.icc-cricket.com/rankings/womens/team-rankings/odi'
odi_teams_women_df = scrape_odi_teams_women(teams_url)

batting_players_url = 'https://www.icc-cricket.com/rankings/womens/player-rankings/odi/batting'
odi_batting_players_women_df = scrape_odi_batting_players_women(batting_players_url)

allrounders_url = 'https://www.icc-cricket.com/rankings/womens/player-rankings/odi/all-rounder'
odi_allrounders_women_df = scrape_odi_allrounders_women(allrounders_url)

print("Top 10 ODI Teams in Women's Cricket:")
print(odi_teams_women_df)
print()

print("Top 10 Women's ODI Batting Players:")
print(odi_batting_players_women_df)
print()

print("Top 10 Women's ODI All-rounders:")
print(odi_allrounders_women_df)
print()


Top 10 ODI Teams in Women's Cricket:
           Team Matches Points Rating
0     Australia      21   3603    172
1       England      28   3342    119
2  South Africa      26   3098    119
3         India      27   2820    104
4   New Zealand      28   2688     96
5   West Indies      29   2743     95
6    Bangladesh      14    977     70
7     Sri Lanka      12    820     68
8      Thailand      12    806     67
9      Pakistan      27   1678     62

Top 10 Women's ODI Batting Players:
             Player Team Rating
0                           758
1       Beth Mooney  AUS    754
2   Laura Wolvaardt   SA    732
3    Natalie Sciver  ENG    731
4       Meg Lanning  AUS    717
5  Harmanpreet Kaur  IND    716
6   Smriti Mandhana  IND    714
7      Ellyse Perry  AUS    626
8   Stafanie Taylor   WI    618
9    Tammy Beaumont  ENG    595

Top 10 Women's ODI All-rounders:
             Player Team Rating
0                           382
1    Natalie Sciver  ENG    371
2      Ellyse Perry  AUS  

### 5) Write a python program to scrape mentioned news details from https://www.cnbc.com/world/?region=world and make data frame
##### i) Headline
##### ii) Time
##### iii) News Link

In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_news_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    articles = soup.find_all('div', class_='Card-titleContainer')

    headlines = []
    times = []
    links = []

    for article in articles:
        headline_element = article.find('a')
        if headline_element:
            headline = headline_element.text.strip()
        else:
            headline = ''

        time_element = article.find('time', class_='Card-time')
        if time_element:
            time = time_element.text.strip()
        else:
            time = ''

        link_element = article.find('a')
        if link_element:
            link = 'https://www.cnbc.com' + link_element['href']
        else:
            link = ''

        headlines.append(headline)
        times.append(time)
        links.append(link)

    df = pd.DataFrame({
        'Headline': headlines,
        'Time': times,
        'News Link': links
    })

    return df

url = 'https://www.cnbc.com/world/?region=world'
news_df = scrape_news_details(url)

print("News Details:")
for _, row in news_df.iterrows():
    print(f"Headline: {row['Headline']}")
    print(f"Time: {row['Time']}")
    print(f"News Link: {row['News Link']}")
    print()


News Details:
Headline: Stocks tumble on Friday, notching weekly losses, as traders' rate hike fears return: Live updates
Time: 
News Link: https://www.cnbc.comhttps://www.cnbc.com/2023/07/06/stock-market-today-live-updates.html

Headline: Ukraine reports advances near eastern city of Bakhmut; Germany opposes sending cluster bombs to Kyiv
Time: 
News Link: https://www.cnbc.comhttps://www.cnbc.com/2023/07/07/ukraine-war-live-updates-latest-news-on-russia-and-the-war-in-ukraine.html

Headline: Wagner leader Prigozhin in St. Petersburg, Belarus leader says; Zelenskyy calls for more weapons
Time: 
News Link: https://www.cnbc.comhttps://www.cnbc.com/2023/07/06/ukraine-war-live-updates-latest-news-on-russia-and-the-war-in-ukraine.html

Headline: Zelenskyy warns of provocations at nuclear plant; Xi reportedly warned Putin against nuclear attack
Time: 
News Link: https://www.cnbc.comhttps://www.cnbc.com/2023/07/05/ukraine-war-live-updates-latest-news-on-russia-and-the-war-in-ukraine.html

Head

### 6) Write a python program to scrape the details of most downloaded articles from AI in last 90 days.https://www.journals.elsevier.com/artificial-intelligence/most-downloaded-articles
#### Scrape below mentioned details and make data frame
#### i) Paper Title
#### ii) Authors
#### iii) Published Date
#### iv) Paper URL

In [70]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_most_downloaded_articles(time_period):
    """Scrape the details of most downloaded articles from AI in the last `time_period` days."""

    url = "https://www.journals.elsevier.com/artificial-intelligence/most-downloaded-articles"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    articles = []
    for li in soup.find_all("li", class_="sc-9zxyh7-1"):
        article_details = {}
        article = li.find("article")
        

        title_element = article.find("h2", class_="sc-1qrq3sd-1")
        article_details["paper_title"] = title_element.text.strip() if title_element else ""
        article_details["paper_url"] = title_element.get("href")



        authors_element = article.find("span", class_="sc-1w3fpd7-0")
        article_details["authors"] = authors_element.text.strip().split(", ") if authors_element else []
        date_element = article.find("span", class_="sc-1thf9ly-2")
        article_details["published_date"] = date_element.text.strip() if date_element else ""

        articles.append(article_details)

    return articles

if __name__ == "__main__":
    articles = scrape_most_downloaded_articles(90)
    df = pd.DataFrame(articles, columns=["paper_title", "authors", "published_date", "paper_url"])

    pd.set_option("display.max_colwidth", 60)  # Adjust column width
    print("Most Downloaded Articles in the Last 90 Days:")
    print(df.to_string(index=False, justify="left", max_colwidth=40, na_rep=""))


Most Downloaded Articles in the Last 90 Days:
paper_title                              authors                                  published_date paper_url
                        Reward is enough [David Silver, Satinder Singh, Doina ...   October 2021 None     
Explanation in artificial intelligenc...                             [Tim Miller]  February 2019 None     
  Creativity and artificial intelligence                      [Margaret A. Boden]    August 1998 None     
Conflict-based search for optimal mul... [Guni Sharon, Roni Stern, Ariel Felne...  February 2015 None     
Knowledge graphs as tools for explain...         [Ilaria Tiddi, Stefan Schlobach]   January 2022 None     
Law and logic: A review from an argum...         [Henry Prakken, Giovanni Sartor]   October 2015 None     
Between MDPs and semi-MDPs: A framewo... [Richard S. Sutton, Doina Precup, Sat...    August 1999 None     
Explaining individual predictions whe... [Kjersti Aas, Martin Jullum, Anders L... September 2021 N

### 7) Write a python program to scrape mentioned details from dineout.co.inand make data frame
#### i) Restaurant name
#### ii) Cuisine
#### iii) Location
#### iv) Ratings
#### v) Image URL

In [79]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_dineout_restaurants():
    url = "https://www.dineout.co.in/bangalore-restaurants"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    restaurants = []
    for card in soup.find_all("div", class_="restnt-card"):
        restaurant_details = {}

        name_element = card.find("div", class_="restnt-info")
        restaurant_details["Restaurant Name"] = name_element.find("a").text.strip()


        cuisine_element = card.find("span", class_="double-line-ellipsis")
        restaurant_details["Cuisine"] = cuisine_element.text.strip() if cuisine_element else ""


        location_element = card.find("div", class_="restnt-loc ellipsis")
        restaurant_details["Location"] = location_element.text.strip() if location_element else ""


        ratings_element = card.find("span", class_="ratings-val")
        restaurant_details["Ratings"] = ratings_element.text.strip() if ratings_element else ""


        image_element = card.find("div", class_="restnt-img")
        restaurant_details["Image URL"] = image_element.find("img")["data-src"] if image_element else ""

        restaurants.append(restaurant_details)

    return restaurants

if __name__ == "__main__":
    restaurants = scrape_dineout_restaurants()
    df = pd.DataFrame(restaurants, columns=["Restaurant Name", "Cuisine", "Location", "Ratings", "Image URL"])
    print("Restaurant Details:")
    print(df)


Restaurant Details:
                         Restaurant Name  \
0     The Bier Library Brewery & Kitchen   
1                     The Bangalore Cafe   
2                         Hard Rock Cafe   
3                          Spice Terrace   
4                   Skydeck By Sherlocks   
5                                 Mirage   
6                               Badmaash   
7                             JW Kitchen   
8                             Farzi Cafe   
9                           Uru Brewpark   
10                               Magique   
11                                  Zest   
12                              Hammered   
13                               Toscano   
14  Salt - Indian Restaurant Bar & Grill   
15                        The Biere Club   
16                                 Raahi   
17                                 Shiro   
18                         Chutney Chang   
19                               Sanchez   
20                           Cafe Azzure   

           