# Part 0: Building the network



In [11]:
import urllib.request
import re
import networkx as nx
import pandas as pd
import os

# Fetch the names from the current wikipedia category pages for West Coast and East Cost Hip Hop Musicians
def fetch_current_names():
    urlWest = 'https://en.wikipedia.org/wiki/Category:West_Coast_hip_hop_musicians'
    response = urllib.request.urlopen(urlWest)
    data = response.read()      # a `bytes` object
    textWest = data.decode('utf-8')
    westNames = re.findall('href="/wiki/([^:#"]+)"', textWest)
    urlWest = 'https://en.wikipedia.org/w/index.php?title=Category:West_Coast_hip_hop_musicians&pagefrom=McFedries%2C+Trevor%0ATrevor+McFedries#mw-pages'
    response = urllib.request.urlopen(urlWest)
    data = response.read()      # a `bytes` object
    textWest = data.decode('utf-8')
    westNames2 = re.findall('href="/wiki/([^:#"]+)"', textWest)
    westNames = westNames[17:] + westNames2[17:]


    urlEast = 'https://en.wikipedia.org/wiki/Category:East_Coast_hip_hop_musicians'
    response = urllib.request.urlopen(urlEast)
    data = response.read()      # a `bytes` object
    textEast = data.decode('utf-8')
    eastNames = re.findall('href="/wiki/([^:#"]+)"', textEast)
    urlEast = 'https://en.wikipedia.org/w/index.php?title=Category:East_Coast_hip_hop_musicians&pagefrom=Junglepussy%0AJunglepussy#mw-pages'
    response = urllib.request.urlopen(urlEast)
    data = response.read()      # a `bytes` object
    textWest = data.decode('utf-8')
    eastNames2 = re.findall('href="/wiki/([^:#"]+)"', textWest)
    urlEast = 'https://en.wikipedia.org/w/index.php?title=Category:East_Coast_hip_hop_musicians&pagefrom=Stezo#mw-pages'
    response = urllib.request.urlopen(urlEast)
    data = response.read()      # a `bytes` object
    textWest = data.decode('utf-8')
    eastNames3 = re.findall('href="/wiki/([^:#"]+)"', textWest)
    eastNames = eastNames[15:] + eastNames2[15:] + eastNames3[15:]
    
    names = eastNames + westNames
    
    # Save the combined list of both east and west in a .txt file called "rappers"
    with open('rappers.txt', 'w') as f:
        for item in westNames:
            f.write("%s\n" % item)
        for item in eastNames:
            f.write("%s\n" % item)

    # Save the list of east in a .txt file called "rappersEast"
    with open('rappersEast.txt', 'w') as f:
        for item in eastNames:
            f.write("%s\n" % item)

    # Save the list of west in a .txt file called "rappersWest"
    with open('rappersWest.txt', 'w') as f:
        for item in westNames:
            f.write("%s\n" % item)

    return names, eastNames, westNames

import json

def fetch_wiki_data(rappers, coast):
    # Base URL for the Wikipedia API
    base_url = 'https://en.wikipedia.org/w/api.php'
    
    # Determine the directory based on the coast
    if coast == "east":
        directory = 'wikisEast'
    elif coast == "west":
        directory = 'wikisWest'
    else:
        raise ValueError("Invalid coast value. Use either 'east' or 'west'.")

    # Ensure the directory exists
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Loop through the rapper names
    for rapper in rappers:
        # Prepare parameters for the API request
        params = {
            "action": "query",
            "prop": "extracts",
            "exlimit": "1",
            "explaintext": "1",
            "format": "json",
            "titles": urllib.parse.quote_plus(rapper.replace(" ", "_"))
        }

        # Construct the full URL
        api_url = base_url + "?" + urllib.parse.urlencode(params)
        
        # Make the request
        response = urllib.request.urlopen(api_url)
        data = json.loads(response.read().decode('utf-8'))
        
        # Parse the JSON data to get the 'extract' key
        pages = data.get("query", {}).get("pages", {})
        for page_id, page_data in pages.items():
            content = page_data.get("extract", "")
            if content:
                # Save the content to the appropriate .txt file in the designated directory
                with open(f'{directory}/{rapper}.txt', 'w', encoding="utf-8") as f:
                    f.write(content)

# Example Usage:
names, eastNames, westNames = fetch_current_names()
fetch_wiki_data(eastNames, "east")
fetch_wiki_data(westNames, "west")


