# Blog Code

### Import packages

In [1]:
import requests
from bs4 import BeautifulSoup 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## Web Scraping:
### HIV Data

In [6]:
HIV_data = []

url = 'https://en.wikipedia.org/wiki/List_of_countries_by_HIV/AIDS_adult_prevalence_rate'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table with the class 'wikitable sortable'
table = soup.find('table', {'class': 'wikitable sortable static-row-numbers'})

# Extract data from the table
rows = table.find_all('tr')  # Find all rows in the table

for row in rows[1:]:
    # Extract data from each cell in the row
    cells = row.find_all(['th', 'td'])
    row_data = [cell.get_text(strip=True) for cell in cells]
    
    # Append row data to HIV_data
    HIV_data.append(row_data)


# Extract headers from the first row
headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]

# Convert HIV_data into a pandas DataFrame
HIV_df = pd.DataFrame(HIV_data, columns=headers)

# rename some columns
HIV_df.rename(columns={'Country/Region': 'Country'}, inplace=True)
HIV_df.rename(columns={'Adult prevalenceof HIV/AIDS[1]': 'Adult prevalenceof HIV/AIDS'}, inplace=True)
HIV_df.rename(columns={'Annual deathsfrom HIV/AIDS[3]': 'Annual deathsfrom HIV/AIDS'}, inplace=True)
HIV_df.rename(columns={'Year of estimate': 'Year'}, inplace=True)



display(HIV_df)

Unnamed: 0,Country,Adult prevalenceof HIV/AIDS,Number of peoplewith HIV/AIDS,Annual deathsfrom HIV/AIDS,Year
0,Eswatini,28.30%,240000,3000,2024
1,Lesotho,24.10%,403000,5380,2024
2,Botswana,22.60%,398500,5240,2024
3,Zimbabwe,22.10%,1660000,25600,2024
4,South Africa,14%,9230000,77000,2024
...,...,...,...,...,...
181,Mongolia,-,2500,-,2024
182,British Virgin Islands,-,48,-,2024
183,American Samoa,-,110,-,2023
184,Nauru,-,270,-,2024


### Population Data

In [12]:
Pop_data = []

url = 'https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table with the class 'wikitable sortable'
table = soup.find('table',
                  {'class': 'wikitable sortable sticky-header sort-under mw-datatable col2left col6left'})

# Extract data from the table
rows = table.find_all('tr')  # Find all rows in the table



for row in rows[1:]:
    # Extract data from each cell in the row
    cells = row.find_all(['th', 'td'])
    row_data = [cell.get_text(strip=True) for cell in cells]
    
    # Append row data to HIV_data
    Pop_data.append(row_data)



# Extract headers from the first row
headers = [header.get_text(strip=True) for header in rows[0].find_all('th')]

# Convert HIV_data into a pandas DataFrame
Pop_df = pd.DataFrame(Pop_data, columns=headers)

# remove unwanted columns
Pop_df = Pop_df.drop(columns=['Date' , '' , '% ofworld', 'Source (official or fromtheUnited Nations)'])

# rename the country column
Pop_df.rename(columns={'Location': 'Country'}, inplace=True)

display(Pop_df)

Unnamed: 0,Country,Population
0,World,8097611000
1,China,1409670000
2,1392329000,17.2%
3,United States,335893238
4,Indonesia,279118866
...,...,...
236,Niue,1689
237,Tokelau(NZ),1647
238,Vatican City,764
239,Cocos (Keeling) Islands(Australia),593
