## Getting Poets

In [1]:
import pandas as pd
from IPython.display import display, FileLink

In [2]:
import re
import requests
from bs4 import BeautifulSoup

In [41]:
url = "https://en.wikipedia.org/wiki/List_of_Urdu_poets"

response = requests.get(url)

In [42]:
if response.status_code == 200:

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Find the element with id "content"
    content = soup.find(id="content")
    
    # Find all hyperlinks within the "content" element
    hyperlinks = content.find_all("a")
    
    # Define a regular expression pattern to match valid URLs
    pattern = r"^\/wiki\/[^.:]+$"
    
    # Create a list of tuples containing poet names and their URLs
    poet_names_and_urls = [(link.text, link["href"]) for link in hyperlinks if link.has_attr("href") and re.match(pattern, link["href"])]
    
    # Extract poet names from the list of tuples
    poet_names = [poet[0] for poet in poet_names_and_urls]
    
    # Find the indices of "Amir Khusro" and "Fuzail Ahmad Nasiri"
    min_index = poet_names.index("Amir Khusro")
    max_index = poet_names.index("Fuzail Ahmad Nasiri")
    
    # Filter the list of poet names to include only those within the desired range
    filtered_poets = poet_names[min_index:max_index + 1]

270


In [70]:
def fetch_wikitext(title):
    # Base URL for Wikipedia API
    base_url = "https://en.wikipedia.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "query",
        "prop": "revisions",
        "rvprop": "content",
        "titles": title,
        "format": "json"
    }

    # Send a GET request to the Wikipedia API
    response = requests.get(base_url, params=params)
    
    # Parse the response content as JSON
    data = response.json()
    
    # Extract the page ID from the response
    page_id = next(iter(data['query']['pages']))
    
    # Get the content of the first revision of the page
    wikitext = data['query']['pages'][page_id].get('revisions', [{}])[0].get('*', '')
    
    return wikitext

In [96]:
all_data = []  # To store the extracted data for all poets
base_url = "https://en.wikipedia.org"  # Base URL for Wikipedia

for poet in filtered_poets:  # Loop through each filtered poet

    title = poet
    wikitext = fetch_wikitext(title)  # Fetch wikitext for the poet's page

    redirect_match = re.match(r'#REDIRECT \[\[(.*?)\]\]', wikitext)
    if redirect_match:
        new_title = redirect_match.group(1).replace(' ', '_')
        wikitext = fetch_wikitext(new_title)  # Follow redirects if present
    
    # Extract birth and death years using regular expressions
    birth_date_pattern = r'birth_date\s*=\s*(?:.*?)(\d{4})'
    death_date_pattern = r'death_date\s*=\s*(?:.*?)(\d{4})'
    
    birth_match = re.search(birth_date_pattern, wikitext)
    birth_year = birth_match.group(1) if birth_match else None
    
    death_match = re.search(death_date_pattern, wikitext)
    death_year = death_match.group(1) if death_match else None

    # Extract birth and death places using regular expressions
    birth_place_match = re.search(r'birth_place\s*=\s*(\[\[.*?\]\])', wikitext, re.DOTALL)
    death_place_match = re.search(r'death_place\s*=\s*(\[\[.*?\]\])', wikitext, re.DOTALL)

    birth_place = ' '.join(re.findall(r'\[\[(.*?)\]\]', birth_place_match.group(1))) if birth_place_match else None
    death_place = ' '.join(re.findall(r'\[\[(.*?)\]\]', death_place_match.group(1))) if death_place_match else None

    # Append the extracted data to the 'all_data' list as a dictionary
    all_data.append({
        'Name': poet,
        'Birthplace': birth_place,
        'Death Place': death_place,
        'Birth Year': birth_year,
        'Death Year': death_year
    })

In [103]:
df = pd.DataFrame(all_data)

In [11]:
pd.set_option('display.max_rows', None)

In [113]:
df.to_csv('poets.csv', index=False)

In [107]:
df_cleaned = df.dropna()

In [117]:
display(FileLink("poets.csv"))

Manual processing of incomplete data fields.

## Location Data

In [5]:
df = pd.read_csv("/Users/muhammadsaadasad/Downloads/Poets-fix.csv")

In [7]:
api_key = ""

In [8]:
def get_location_info(place):
    # Base URL for Google Geocoding API
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    
    # Send a GET request to the API with the specified place and API key
    response = requests.get(base_url, params={"address": place, "key": api_key})
    
    # Parse the JSON response
    res = response.json()

    if res['status'] == 'OK':
        # Extract latitude and longitude from the response
        lat = res['results'][0]['geometry']['location']['lat']
        lng = res['results'][0]['geometry']['location']['lng']
        
        # Find the state (administrative_area_level_1) from the address components
        for component in res['results'][0]['address_components']:
            if 'administrative_area_level_1' in component['types']:
                state = component['long_name']
                break
        else:
            state = None  # If state not found
        
        return lat, lng, state  # Return latitude, longitude, and state
    else:
        return None, None, None  # Return None if status is not OK

In [9]:
df['B_Lat'], df['B_Lon'], df['B_State'] = zip(*df['Birthplace'].apply(lambda x: get_location_info(x)))
df['D_Lat'], df['D_Lon'], df['D_State'] = zip(*df['Death Place'].apply(lambda x: get_location_info(x)))

In [13]:
df.to_csv('Poets-Clean.csv', index=False)

In [14]:
display(FileLink("Poets-Clean.csv"))

## Plotting Data

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import Image, display
import cv2
import os
from plotly.subplots import make_subplots

In [None]:
df = pd.read_csv("/Users/muhammadsaadasad/Downloads/Poets-Clean.csv")

In [None]:
years = [[1200, 1300], [1300, 1400], [1400, 1500], [1500, 1600], [1600, 1700]]
titles = ["1200s", "1300s", "1400s", "1500s", "1600s"]

for i in range(170, 200):
    years.append([(i*10) - 15, (i*10) + 20])
    titles.append(str(i*10))

In [None]:
captions = [
    "Proto-Urdu emerges; Amir Khusro blends Persian, Turkic, and Indian linguistic elements.",
    "Khusro popularizes qawwali and ghazal styles, laying foundations for Urdu's poetic traditions.",
    "The Deccan region starts embracing early forms of Urdu called Dakkani.",
    "Mughal influence promotes Persian, but local interactions birth Urdu's precursors. Dakkani poetry thrives in the south.",
    "Urdu crystallizes in North India. Delhi becomes a hub, and Wali Deccani bridges southern and northern Urdu traditions.",
    "",
    "",
    "",
    "Decline of the Mughal Empire & Nadir Shah's invasion (1739).", #1730s
    "",
    "",
    "",
    "Shift of Urdu cultural center from Delhi to Lucknow.", #1770s
    "",
    "",
    "",
    "British colonial expansion.", #1810
    "",
    "English becomes the language of administration, and Urdu emerges as a lingua franca for North Indians." #1830
    "",
    "British annexation of Awadh (1856).", #1850
    "Decline of Lucknow as centre of Urdu", #1860
    "Establishment of Aligarh Muslim University (1875).",
    "Aligarh's rise as a beacon of Urdu literature and thought.",
    "",
    "Lahore's burgeoning cultural activities.",
    "Lahore joins the ranks as a significant hub for Urdu literature and thought.", #1910
    "",
    "",
    "Partition of India: Hindi is made the Official Language of India while Urdu becomes the Pakistani Lingua Franca.",
    "With the creation of Pakistan, Lahore and Karachi become central to Urdu's cultural and literary life.",
    "",
    "",
    "",
    "Global migrations.",
    "Urdu diaspora grows in the West, infusing traditional poetry with contemporary experiences.",
]

In [None]:
merged_dict = dict(zip(titles, captions))

In [None]:
df['Death Year'] = pd.to_numeric(df['Death Year'], errors='coerce')

In [None]:
token = #

In [None]:
for i, gap in enumerate(years):
    
    fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=('Births', 'Deaths'),
                    specs=[[{'type': 'densitymapbox'}, {'type': 'densitymapbox'}]])
    
    year_range = (gap[0], gap[1])
    
    filtered_df_b = df[(df['Birth Year'] >= year_range[0]) & (df['Birth Year'] <= year_range[1])]
    agg_df_b = filtered_df_b.groupby(['B_Lat', 'B_Lon']).size().reset_index(name='Frequency')
    
    filtered_df_d = df[(df['Death Year'] >= year_range[0]) & (df['Death Year'] <= year_range[1])]
    agg_df_d = filtered_df_d.groupby(['D_Lat', 'D_Lon']).size().reset_index(name='Frequency')
    
    heatmap_births = px.density_mapbox(agg_df_b, 
                        lat='B_Lat', 
                        lon='B_Lon', 
                        z='Frequency', 
                        radius=10,
                        mapbox_style="light").data[0]  # Simpler background)

    heatmap_deaths = px.density_mapbox(agg_df_d, 
                        lat='D_Lat', 
                        lon='D_Lon', 
                        z='Frequency', 
                        radius=10,
                        mapbox_style="light").data[0]  # Simpler background)
    
    fig.add_trace(heatmap_births, row=1, col=1)
    fig.add_trace(heatmap_deaths, row=1, col=2)
    
    # Add a caption with the "year" variable
    year = titles[i]
    fig.add_annotation(text=year,  # Bold year
                       xref="paper", yref="paper",
                       x=0.5, y=1.15,  # Position of the year
                       showarrow=False,
                       font=dict(size=23))
    
    fig.add_annotation(text=merged_dict[year],  # Caption
                       xref="paper", yref="paper",
                       x=0.5, y=-0.1,  # Position of the caption
                       showarrow=False,
                       font=dict(size=10),
                       bgcolor="rgba(255, 255, 255, 0.7)",
                       bordercolor="black",
                       borderwidth=1,
                       borderpad=4)
    
    fig.update_layout(coloraxis_showscale=False)
    fig.update_layout(mapbox1=dict(accesstoken=token, center=dict(lat=26, lon=79), zoom=2.75),
                      mapbox2=dict(accesstoken=token, center=dict(lat=26, lon=79), zoom=2.75))
    image_filename = f"map_{titles[i]}.png"
    fig.write_image(image_filename) 

In [None]:
out = cv2.VideoWriter(downloads_path, cv2.VideoWriter_fourcc(*'mp4v'), 0.5, size)

img_files = sorted([img for img in os.listdir() if img.startswith("map_") and img.endswith(".png")])
frame = cv2.imread(img_files[0])
h, w, layers = frame.shape
size = (w,h)

for i in range(len(img_files)):
    img = cv2.imread(img_files[i])
    out.write(img)

out.release()