In [10]:
import requests
import pandas as pd

# Wikipedia API endpoint
url = "https://en.wikipedia.org/w/api.php"

# Sections we want
sections_to_extract = [
    "Bread", "Common food", "Traditional desserts",
    "Common beverages", "See also", "References"
]

# Add a User-Agent header to mimic a browser
headers = {
    'User-Agent': 'Mozilla/5.0'
}

# Step 1: Get page content in JSON
params = {
    "action": "parse",
    "page": "Uttar_Pradesh_cuisine",
    "format": "json",
    "prop": "sections|text"
}
response = requests.get(url, params=params, headers=headers)
data = response.json()

# Step 2: Get section numbers for desired sections
sections_info = data['parse']['sections']
section_map = {}
for sec in sections_info:
    title = sec['line']
    if title in sections_to_extract:
        section_map[title] = sec['index']

# Step 3: Extract content for each section
rows = []
for title, index in section_map.items():
    params = {
        "action": "parse",
        "page": "Uttar_Pradesh_cuisine",
        "format": "json",
        "prop": "text",
        "section": index
    }
    resp = requests.get(url, params=params, headers=headers)
    section_html = resp.json()['parse']['text']['*']

    # Use BeautifulSoup to parse section HTML
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(section_html, "html.parser")

    # Extract all list items and paragraphs
    for li in soup.find_all("li"):
        rows.append({"Section": title, "Item": li.get_text(strip=True)})
    for p in soup.find_all("p"):
        text = p.get_text(strip=True)
        if text:
            rows.append({"Section": title, "Item": text})

# Step 4: Save as CSV
df = pd.DataFrame(rows)
print("Example of resulting CSV data:")
print(df.head(20))  # preview first 20 rows
df.to_csv("uttar_pradesh_cuisine.csv", index=False, encoding="utf-8")
print("\nData scraped and saved to uttar_pradesh_cuisine.csv")

Example of resulting CSV data:
        Section                                               Item
0         Bread  As wheat is the staple food of the state, brea...
1   Common food                                            Biryani
2   Common food                                             Boondi
3   Common food                                              Chaat
4   Common food  Dumbhindi(fried whole okra stuffed with spiced...
5   Common food                                          Egg curry
6   Common food                                              Kofta
7   Common food                                              Korma
8   Common food                                             Lotpot
9   Common food                                       Litti chokha
10  Common food                                     Mutton biryani
11  Common food                                             Nihari
12  Common food                                             Pakora
13  Common food                

In [11]:
from google.colab import files

files.download('uttar_pradesh_cuisine.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>