In [1]:
import numpy as np 
import pandas as pd
import requests
import json
import requests
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt

In [2]:
def get_NYC_data():
    """
    This method is used to collect New York geographic data in the form of a json file and then transform it into dataframe
    """
    NY_DATASET = "https://cocl.us/new_york_dataset"
    resp = requests.get(NY_DATASET).json()
    features = resp['features']
    column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']
    NY_data = pd.DataFrame(columns = column_names)
    
    for data in features:
        borough = data['properties']['borough']
        neighborhood_name = data['properties']['name']
        neighborhood_lat = data['geometry']['coordinates'][1]
        neighborhood_lon = data['geometry']['coordinates'][0]
        
        NY_data = NY_data.append({'Borough': borough,
                                  'Neighborhood': neighborhood_name,
                                  'Latitude': neighborhood_lat,
                                  'Longitude': neighborhood_lon}, ignore_index = True)
    return NY_data    

In [3]:
NYC_geo = get_NYC_data()

In [4]:
NYC_geo.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [5]:
def get_population_neighborhood(read_from_csv = False):
    if not read_from_csv:
        WIKI_LINK = "https://en.wikipedia.org/wiki/Neighborhoods_in_New_York_City"
        ROOT_WIKI_LINK = "https://en.wikipedia.org"
        page = requests.get(WIKI_LINK)
        soup = BeautifulSoup(page.text, 'html.parser')
        population_list = []
        for table_row in soup.select("table.wikitable tr"):
            cells = table_row.findAll('td')
            if len(cells) > 0:
                borough = cells[0].text.strip().replace(
                    '\xa0', ' ').split(' ')[0]
                population = int(cells[3].text.strip().replace(',', ''))
                for item in cells[4].findAll('a'):
                    neighborhood = item.text
                    neighbourhood_page = requests.get(
                        ROOT_WIKI_LINK+item['href'])
                    soup = BeautifulSoup(
                        neighbourhood_page.text, 'html.parser')
                    table = soup.select("table.infobox tr")
                    should_record = False
                    for row in table:
                        head = row.find('th')
                        body = row.find('td')
                        if head and 'population' in head.text.lower():
                            should_record = True
                            continue
                        if should_record:
                            try:
                                population_list.append(
                                    [borough, neighborhood, int(body.text.replace(',', ''))])
                            except:
                                pass
                            should_record = False
        df = pd.DataFrame(population_list, columns=[
                          "Borough", "Neighborhood", "Population"])
        df.to_csv('population.csv')
    else:
        df = pd.read_csv('population.csv')
    df = df.sort_values(by=['Borough'])
    df = df.drop_duplicates(subset='Neighborhood', keep='last')
    return df

In [6]:
nyc_population_df = get_population_neighborhood()


In [7]:
nyc_population_df.head()

Unnamed: 0,Borough,Neighborhood,Population
0,Bronx,Melrose,24913
25,Bronx,Bruckner,38557
26,Bronx,Castle Hill,38557
27,Bronx,Clason Point,9136
28,Bronx,Harding Park,9136


In [8]:
# Combine NYC Geo data with Population data
NYC_geo.set_index('Neighborhood')
nyc_population_df.set_index('Neighborhood')
nyc_df = pd.merge(NYC_geo, nyc_population_df, how="inner", on=["Borough", "Neighborhood"])
nyc_df.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Population
0,Bronx,Wakefield,40.894705,-73.847201,29158
1,Bronx,Co-op City,40.874294,-73.829939,43752
2,Bronx,Fieldston,40.895437,-73.905643,3292
3,Bronx,Riverdale,40.890834,-73.912585,48049
4,Bronx,Kingsbridge,40.881687,-73.902818,10669


In [9]:
nyc_df.to_csv('nyc_data.csv')