# Economic Analysis: Web Scraping Wikipedia Data

In [None]:

# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot styles
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)


## 1. Web Scraping Data

In [None]:

# Example: Scrape a Wikipedia table of countries by GDP (nominal)
url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Find the first table in the page
table = soup.find("table", {"class": "wikitable"})

# Read the table into a pandas DataFrame
dfs = pd.read_html(str(table))
gdp_data = dfs[0]

# Show first few rows
gdp_data.head()


## 2. Data Preprocessing

In [None]:

# Clean column names
gdp_data.columns = ["Rank", "Country", "GDP_USD_Millions", "Year"]

# Drop unnecessary columns
gdp_data = gdp_data[["Country", "GDP_USD_Millions"]]

# Remove commas and convert GDP to numeric
gdp_data["GDP_USD_Millions"] = (
    gdp_data["GDP_USD_Millions"]
    .replace('[\$,]', '', regex=True)
    .astype(float)
)

# Drop any missing values
gdp_data.dropna(inplace=True)

# Show cleaned data
gdp_data.head()


## 3. Analysis

In [None]:

# Top 10 countries by GDP
top10 = gdp_data.sort_values(by="GDP_USD_Millions", ascending=False).head(10)
top10


## 4. Visualization

In [None]:

# Bar plot for Top 10 Economies
plt.figure(figsize=(10,6))
sns.barplot(data=top10, x="GDP_USD_Millions", y="Country", palette="viridis")
plt.title("Top 10 Countries by GDP (Nominal)")
plt.xlabel("GDP (in USD Millions)")
plt.ylabel("Country")
plt.show()


## 5. Insights and Conclusions


**Insights:**
- The United States has the highest nominal GDP among all countries.
- China and Japan follow as the 2nd and 3rd largest economies respectively.
- European countries like Germany and the United Kingdom also appear in the top 10.

**Conclusions:**
- GDP (Nominal) is one indicator of economic strength.
- Further study can include GDP per capita or purchasing power parity (PPP) to get deeper insights.
