In [1]:
import re

import pandas as pd
import numpy as np

# to display values correctly, as they sometimes show as if were divided
pd.set_option('display.float_format', '{:.2f}'.format)

In [2]:
US_STATES = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "DC": "District Columbia",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "IA": "Iowa",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "ME": "Maine",
    "MD": "Maryland",
    "MA": "Massachusetts",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MS": "Mississippi",
    "MO": "Missouri",
    "MT": "Montana",
    "NE": "Nebraska",
    "NV": "Nevada",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NY": "New York",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VT": "Vermont",
    "VA": "Virginia",
    "WA": "Washington",
    "WV": "West Virginia",
    "WI": "Wisconsin",
    "WY": "Wyoming"
}

In [3]:
def remove_special_character_school_rating(school_rating):
    return school_rating.replace('Â', '')


def remove_special_character_nightlife_rating(nightlife_rating):
    return nightlife_rating.replace('gradeÂ', '').replace("minus", "-").replace(" ", "")


def remove_special_character_families_rating(families_rating):
    return families_rating.replace('gradeÂ', '').replace("minus", "-").replace(" ", "")


def create_state_from_link(link):
    split_url = link.split("/")[-2]
    return split_url[-2:].upper()


def number_to_int(number: str):
    try:
        return int(number.replace(",", "").replace("$", ""))
    except:
        return "No data available"


def add_missing_type_of_place(link):
    link_split_list = link.split("/")
    if link_split_list[4] == "n":
        place_split = link_split_list[5].split("-")
        return f"Neighborhood in {place_split[-2].capitalize()}, {place_split[-1].upper()}"
    elif link_split_list[4] != "n":
        place_split = link_split_list[4].split("-")
        return f"Suburb of {place_split[-2].capitalize()}, {place_split[-1].upper()}"
    else:
        return "no data"
    
    
def change_state_abbreviation_to_name(abbreviation: str):
    return US_STATES[abbreviation]

def remove_niche_prefix(link):
    prefix1 = "https://www.niche.com/places-to-live/"
    prefix2 = "https://www.niche.com/places-to-live/n/"

    if link.startswith(prefix2):
        return link[len(prefix2):][:-1]
    else:
        return link[len(prefix1):][:-1]
    
def create_rent_to_sell_value_ratio(places_dataframe):
    filtered_places = places_dataframe.copy()
    filtered_places["median_home_value"] = pd.to_numeric(filtered_places["median_home_value"], errors="coerce")
    filtered_places["median_rent"] = pd.to_numeric(filtered_places["median_rent"], errors="coerce")
    filtered_places["rent_sell_value_ratio"] = filtered_places["median_rent"] / filtered_places["median_home_value"]
    
    return filtered_places

def fill_nan_values_median_rent(rent_to_sell_value):
    None

def add_name_with_state(link):
    link_without_prefix = remove_niche_prefix(link)
    place_name_split = link_without_prefix.split("-")
    
    if len(place_name_split) == 5:
        place = f"{place_name_split[0].capitalize()} {place_name_split[1].capitalize()}"
        city_or_county = f"{place_name_split[2].capitalize()} {place_name_split[3].capitalize()}"
        state = place_name_split[4].upper()
    elif len(place_name_split) == 4:
        place = f"{place_name_split[0].capitalize()} {place_name_split[1].capitalize()}"
        city_or_county = place_name_split[2].capitalize()
        state = place_name_split[3].upper()
    elif len(place_name_split) == 3:
        place = place_name_split[0].capitalize()
        city_or_county = place_name_split[1].capitalize()
        state = place_name_split[2].upper()
    else:
        return f"{place_name_split[0].capitalize()}, {place_name_split[1].upper()}"
    return f"{place}, {city_or_county}, {state}"

In [4]:
places = pd.read_csv("niche_scraped_data_raw.csv")

# removing rows that are unnecessary
places = places[places["school_rating"] != "le"].reset_index(drop=True)

# correcting values
places["school_rating"] = places["school_rating"].map(remove_special_character_school_rating)
places["nightlife_rating"] = places["nightlife_rating"].map(remove_special_character_nightlife_rating)
places["families_rating"] = places["families_rating"].map(remove_special_character_families_rating)
places["population"] = places["population"].map(number_to_int)
places["median_home_value"] = places["median_home_value"].map(number_to_int)
places["median_rent"] = places["median_rent"].map(number_to_int)
places["median_household_income"] = places["median_household_income"].map(number_to_int)

# correcting values for place only if they have missing value
filtered_rows = places[places['type_of_place'].str.contains("gradeÂ")]
places.loc[filtered_rows.index, "type_of_place"] = filtered_rows["link"].map(add_missing_type_of_place)

# creating new rows
places["state"] = places["link"].map(create_state_from_link)
places["name_with_state"] = places.apply(lambda row: f'{row["name"]}, {row["state"].upper()}', axis=1)
places["state"] = places["state"].map(change_state_abbreviation_to_name)
places = create_rent_to_sell_value_ratio(places)

# filling missing values for median-rent and median-household-value
# I have filled them both as a ratio of rent and house value respectively
places["median_rent"] = places["median_rent"].fillna(places["rent_sell_value_ratio"].mean() * places["median_home_value"])
places["median_home_value"] = places["median_home_value"].fillna((1/places["rent_sell_value_ratio"].mean()) * places["median_rent"])
places = places.drop("rent_sell_value_ratio", axis=1) # column is not needed anymore

In [6]:
places["type_of_place"].str.split(" ", expand=True)[0].value_counts()

Suburb          4294
Neighborhood    2584
Town            2395
City             160
Name: 0, dtype: int64

In [7]:
places.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9433 entries, 0 to 9432
Data columns (total 30 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   name                      9433 non-null   object 
 1   link                      9433 non-null   object 
 2   school_rating             9433 non-null   object 
 3   nightlife_rating          9433 non-null   object 
 4   families_rating           9433 non-null   object 
 5   type_of_place             9433 non-null   object 
 6   rented_percentage         9433 non-null   int64  
 7   owned_percentage          9433 non-null   int64  
 8   population                9433 non-null   int64  
 9   median_home_value         9433 non-null   float64
 10  median_rent               9433 non-null   float64
 11  median_household_income   9433 non-null   int64  
 12  area_feel                 9433 non-null   object 
 13  Assault                   9433 non-null   object 
 14  Murder  

In [8]:
places["area_feel"].value_counts()

Sparse Suburban       2147
Rural                 2009
Dense Suburban        1958
Urban Suburban Mix    1313
Suburban Rural Mix     929
Urban                  467
Dense Urban            339
No data available      136
Sparse Urban           135
Name: area_feel, dtype: int64

In [10]:
places[["name", "Assault", "Rape"]][places["Murder"] == "no data"]

Unnamed: 0,name,Assault,Rape
0,Chesterbrook,no data,no data
1,Colonial Village,no data,no data
2,Ardmore,no data,no data
3,Hyde Park - Spanishtown Creek,no data,no data
4,Penn Wynne,no data,no data
...,...,...,...
9418,Meadowood,no data,no data
9420,Horse Shoe,no data,no data
9423,Fabius Township,no data,no data
9428,Garfield Park,no data,no data


In [85]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

US_AVERAGE_ASSAULT = 282.7
US_AVERAGE_MURDER = 6.1
US_AVERAGE_RAPE = 40.7
US_AVERAGE_ROBBERY = 135.5
US_AVERAGE_BURGLARY = 500.1
US_AVERAGE_THEFT = 2042.8
US_AVERAGE_MOTOR_VEHICLE_THEFT = 284

#places = pd.read_csv("niche_scraped_data_raw.csv")

def scrape_missing_crime_data(url_crime):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
    }

    request_crime = requests.get(url_crime, headers=headers)
    soup = BeautifulSoup(request_crime.content, "html.parser")
    violent_crime = soup.find("div", class_="sfs__fact b")
    violent_crime_element = violent_crime.find("span", class_="circle-text")

    if violent_crime_element:
        violent_crime_percentage = violent_crime_element.get_text(strip=True)
    else:
        violent_crime_percentage = "no data"

    property_crime = soup.find("div", class_="sfs__fact c")
    property_crime_element = property_crime.find("span", class_="circle-text")

    if property_crime_element:
        property_crime_percentage = property_crime_element.get_text(strip=True)
    else:
        property_crime_percentage = "no data"

    violent_crime_percentage = int(violent_crime_percentage.replace("%", "").replace(",", ""))
    property_crime_percentage = int(property_crime_percentage.replace("%", "").replace(",", ""))

    if (violent_crime_percentage/100 > 1) or (property_crime_percentage/100 > 1):
        return 0, 0
    else:
        return violent_crime_percentage / 100, property_crime_percentage / 100


def create_link_for_crime_data_first_attempt(type_of_place, name, name_with_state):
    link_beginning = r"https://www.areavibes.com/"
    
    if type_of_place.split(" ")[0] == "Neighborhood":
        place_split = type_of_place[16:].split(", ")
        place = place_split[0].replace(" ", "+")
        neighborhood = name.replace(" ", "+")
        link_end = f"{place}-{place_split[-1]}/{neighborhood}"
    elif type_of_place.split(" ")[0] == "Suburb":
        place_split = type_of_place.split(", ")
        suburb = name.replace(" ", "+")
        link_end = f"{suburb}-{place_split[-1]}"
    elif type_of_place.split(" ")[0] == "City" or type_of_place.split(" ")[0] == "Town":
        place_split = name_with_state.split(", ")
        city_or_town = name.replace(" ", "+")
        link_end = f"{city_or_town}-{place_split[-1]}"
    else:
        return "Couldn't work-out the link."
    return f"{link_beginning}{link_end}/crime"
        

def create_link_for_crime_data_second_attempt(type_of_place, name, name_with_state):
    link_beginning = r"https://www.areavibes.com/"
    
    if type_of_place.split(" ")[0] == "Neighborhood":
        place_split = type_of_place[16:].split(", ")
        place = place_split[0].replace(" ", "+")
        neighborhood = name.replace(" ", "+")
        link_end = f"{place}-{place_split[-1]}/{neighborhood}"
    elif type_of_place.split(" ")[0] == "Suburb":
        name_with_state_split = name_with_state.split(", ")
        suburb = name.replace(" ", "+")
        link_end = f"{suburb}-{name_with_state_split[-1]}"
    elif type_of_place.split(" ")[0] == "City" or type_of_place.split(" ")[0] == "Town":
        place_split = name_with_state.split(", ")
        city_or_town = name.replace(" ", "+")
        link_end = f"{city_or_town}-{place_split[-1]}"
    else:
        return "Couldn't work-out the link."
    return f"{link_beginning}{link_end}/crime"
    
    
def fill_missing_crime_values(places_df):

    for index, row in places_df.iterrows():
        if row["Assault"] == "no data":
            print(f"Processing {places_df['name_with_state'][index]}")

            try:
                (
                    violent_crime_percentage,
                    property_crime_percentage,
                ) = scrape_missing_crime_data(create_link_for_crime_data_first_attempt(row["type_of_place"],
                                                                                       row["name"],
                                                                                       row["name_with_state"]))
                places_df.at[index, "Assault"] = violent_crime_percentage * US_AVERAGE_ASSAULT
                places_df.at[index, "Murder"] = violent_crime_percentage * US_AVERAGE_MURDER
                places_df.at[index, "Rape"] = violent_crime_percentage * US_AVERAGE_RAPE
                places_df.at[index, "Robbery"] = violent_crime_percentage * US_AVERAGE_ROBBERY
                places_df.at[index, "Burglary"] = property_crime_percentage * US_AVERAGE_BURGLARY
                places_df.at[index, "Theft"] = property_crime_percentage * US_AVERAGE_THEFT
                places_df.at[index, "Motor Vehicle Theft"] = property_crime_percentage * US_AVERAGE_MOTOR_VEHICLE_THEFT
            
            except AttributeError:
                print("Processing failed. Retrying...")

                try:
                    (
                        violent_crime_percentage,
                        property_crime_percentage,
                    ) = scrape_missing_crime_data(create_link_for_crime_data_second_attempt(row["type_of_place"],
                                                                                            row["name"],
                                                                                            row["name_with_state"]))
                    places_df.at[index, "Assault"] = violent_crime_percentage * US_AVERAGE_ASSAULT
                    places_df.at[index, "Murder"] = violent_crime_percentage * US_AVERAGE_MURDER
                    places_df.at[index, "Rape"] = violent_crime_percentage * US_AVERAGE_RAPE
                    places_df.at[index, "Robbery"] = violent_crime_percentage * US_AVERAGE_ROBBERY
                    places_df.at[index, "Burglary"] = property_crime_percentage * US_AVERAGE_BURGLARY
                    places_df.at[index, "Theft"] = property_crime_percentage * US_AVERAGE_THEFT
                    places_df.at[index, "Motor Vehicle Theft"] = property_crime_percentage * US_AVERAGE_MOTOR_VEHICLE_THEFT
                
                except AttributeError:
                    print("Processing failed.")

    return places_df

In [86]:
places = fill_missing_crime_values(places)

Processing Hyde Park - Spanishtown Creek, FL
Processing failed. Retrying...
Processing failed.
Processing City Center, CA
Processing failed. Retrying...
Processing failed.
Processing North Quarter, FL
Processing failed. Retrying...
Processing failed.
Processing Mid-City, CA
Processing failed. Retrying...
Processing failed.
Processing The Port, MA
Processing failed. Retrying...
Processing failed.
Processing Ballston/Virginia Square, VA
Processing failed. Retrying...
Processing failed.
Processing Radnor/Fort Myer Heights, VA
Processing failed. Retrying...
Processing failed.
Processing Wilshire/Montana, CA
Processing failed. Retrying...
Processing failed.
Processing Ocean Park, CA
Processing failed. Retrying...
Processing failed.
Processing Peabody, MA
Processing failed. Retrying...
Processing failed.
Processing Oak Park, KS
Processing failed. Retrying...
Processing failed.
Processing Pinehurst Estates, KS
Processing failed. Retrying...
Processing failed.
Processing Agassiz, MA
Processing

Processing failed.
Processing Scarritt Peabody, TN
Processing failed. Retrying...
Processing failed.
Processing Bishop Ranch, CA
Processing failed. Retrying...
Processing failed.
Processing Stoneridge Park, CA
Processing failed. Retrying...
Processing failed.
Processing K Bar Ranch, FL
Processing failed. Retrying...
Processing failed.
Processing South Hollywood Hill, WA
Processing failed. Retrying...
Processing failed.
Processing Dougherty Hills, CA
Processing failed. Retrying...
Processing failed.
Processing Steele Creek, NC
Processing failed. Retrying...
Processing failed.
Processing North Claremont (Uplands), CA
Processing failed. Retrying...
Processing failed.
Processing Berkeley Hills, CA
Processing failed. Retrying...
Processing failed.
Processing Central Folsom, CA
Processing failed. Retrying...
Processing failed.
Processing Landmark, VA
Processing failed. Retrying...
Processing failed.
Processing East Lake Hills, WA
Processing failed. Retrying...
Processing failed.
Processing H

Processing failed.
Processing Downtown, VA
Processing failed. Retrying...
Processing failed.
Processing Pewaukee City, WI
Processing failed. Retrying...
Processing failed.
Processing Historic Downtown, NJ
Processing failed. Retrying...
Processing failed.
Processing Willows, WA
Processing failed. Retrying...
Processing failed.
Processing Superior Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Marble Estates, CA
Processing failed. Retrying...
Processing failed.
Processing Wendover/Sedgewood, NC
Processing failed. Retrying...
Processing failed.
Processing Stillwater Township, MN
Processing failed. Retrying...
Processing failed.
Processing Theater District, NY
Processing failed. Retrying...
Processing failed.
Processing Shearwater, CA
Processing failed. Retrying...
Processing failed.
Processing Parkside, CA
Processing failed. Retrying...
Processing failed.
Processing Downtown, TN
Processing failed. Retrying...
Processing failed.
Processing Devonshire, IN


Processing failed. Retrying...
Processing failed.
Processing Maple Ridge Township, MN
Processing failed. Retrying...
Processing failed.
Processing Vandenbroek, WI
Processing failed. Retrying...
Processing failed.
Processing Pleasant Township, OH
Processing failed. Retrying...
Processing failed.
Processing Highland Township, SD
Processing failed. Retrying...
Processing failed.
Processing Filley Township, NE
Processing failed. Retrying...
Processing failed.
Processing Delaware Township, PA
Processing failed. Retrying...
Processing failed.
Processing Wayne Township, IN
Processing failed. Retrying...
Processing failed.
Processing Sandy Creek Township, PA
Processing failed. Retrying...
Processing failed.
Processing Fayette Township, IA
Processing failed. Retrying...
Processing failed.
Processing Loup Township, NE
Processing failed. Retrying...
Processing failed.
Processing Pineola Township, NC
Processing failed. Retrying...
Processing failed.
Processing Bensalem Township, NC
Processing fail

Processing failed.
Processing Presque Isle Township, MI
Processing failed. Retrying...
Processing failed.
Processing Eau Galle, WI
Processing failed. Retrying...
Processing failed.
Processing Hickey Township, AR
Processing failed. Retrying...
Processing failed.
Processing Thorncreek Township, IN
Processing failed. Retrying...
Processing failed.
Processing Mohican Township, OH
Processing failed. Retrying...
Processing failed.
Processing Alango Township, MN
Processing failed. Retrying...
Processing failed.
Processing New Hope, WI
Processing failed. Retrying...
Processing failed.
Processing Lenoir Township, NC
Processing failed. Retrying...
Processing failed.
Processing Sugar Camp, WI
Processing failed. Retrying...
Processing failed.
Processing Rhinehart Township, MN
Processing failed. Retrying...
Processing failed.
Processing Durham Township, PA
Processing failed. Retrying...
Processing failed.
Processing Westland Township, OH
Processing failed. Retrying...
Processing failed.
Processing 

Processing failed.
Processing Franzen, WI
Processing failed. Retrying...
Processing failed.
Processing Warren Township, MN
Processing failed. Retrying...
Processing failed.
Processing Boyer Township, IA
Processing failed. Retrying...
Processing failed.
Processing Fox Township, IA
Processing failed. Retrying...
Processing failed.
Processing Logan Township, KS
Processing failed. Retrying...
Processing failed.
Processing Highland Town, WI
Processing failed. Retrying...
Processing failed.
Processing New Jasper Township, OH
Processing failed. Retrying...
Processing failed.
Processing Lake Township, IA
Processing failed. Retrying...
Processing failed.
Processing Buck Range Township, AR
Processing failed. Retrying...
Processing failed.
Processing New Denmark, WI
Processing failed. Retrying...
Processing failed.
Processing Rocky Mount Township, NC
Processing failed. Retrying...
Processing failed.
Processing Freedom Township, OH
Processing failed. Retrying...
Processing failed.
Processing Norri

Processing failed. Retrying...
Processing failed.
Processing Harwood Township, IL
Processing failed. Retrying...
Processing failed.
Processing Coral Township, IL
Processing failed. Retrying...
Processing failed.
Processing Hickory Grove Township, IA
Processing failed. Retrying...
Processing failed.
Processing Channahon Township, IL
Processing failed. Retrying...
Processing failed.
Processing 5 Township, AR
Processing failed. Retrying...
Processing failed.
Processing Washington Township, IN
Processing failed. Retrying...
Processing failed.
Processing Perham Township, MN
Processing failed. Retrying...
Processing failed.
Processing Glenrocks, VA
Processing failed. Retrying...
Processing failed.
Processing Beaver Township, PA
Processing failed. Retrying...
Processing failed.
Processing Saylor Township, IA
Processing failed. Retrying...
Processing failed.
Processing White River Township, AR
Processing failed. Retrying...
Processing failed.
Processing Chariton Township, MO
Processing failed.

Processing failed.
Processing Gilkey Township, NC
Processing failed. Retrying...
Processing failed.
Processing Waterman Township, IA
Processing failed. Retrying...
Processing failed.
Processing Otter Creek Township, IA
Processing failed. Retrying...
Processing failed.
Processing Jordan Township, MI
Processing failed. Retrying...
Processing failed.
Processing Watson Township, IL
Processing failed. Retrying...
Processing failed.
Processing Durand Township, MN
Processing failed. Retrying...
Processing failed.
Processing Stockton Springs, ME
Processing failed. Retrying...
Processing failed.
Processing Princetown, NY
Processing failed. Retrying...
Processing failed.
Processing Nekimi, WI
Processing failed. Retrying...
Processing failed.
Processing Holland, WI
Processing failed. Retrying...
Processing failed.
Processing Troy Township, OH
Processing failed. Retrying...
Processing failed.
Processing Jonathan Creek Township, IL
Processing failed. Retrying...
Processing failed.
Processing Healin

Processing failed. Retrying...
Processing failed.
Processing Little Italy, DE
Processing failed. Retrying...
Processing failed.
Processing Flagler Village, FL
Processing failed. Retrying...
Processing failed.
Processing Powdersville, SC
Processing failed. Retrying...
Processing New London Township, PA
Processing failed. Retrying...
Processing failed.
Processing Oakland Estates, TX
Processing failed. Retrying...
Processing failed.
Processing Ramblewood, NJ
Processing failed. Retrying...
Processing Park Forest III, TX
Processing failed. Retrying...
Processing failed.
Processing Liberty-Wells, UT
Processing failed. Retrying...
Processing failed.
Processing Lake Telemark, NJ
Processing failed. Retrying...
Processing Cherry Hill Mall, NJ
Processing failed. Retrying...
Processing Crow Canyon, CA
Processing failed. Retrying...
Processing failed.
Processing North Kenwood, FL
Processing failed. Retrying...
Processing failed.
Processing Brighton Township, MI
Processing failed. Retrying...
Proces

Processing failed. Retrying...
Processing failed.
Processing Buckhall, VA
Processing failed. Retrying...
Processing Milburn Neighborhood Group, KS
Processing failed. Retrying...
Processing failed.
Processing Elizabeth Lakes, VA
Processing failed. Retrying...
Processing failed.
Processing Harristown, NC
Processing failed. Retrying...
Processing failed.
Processing Orange Township, PA
Processing failed. Retrying...
Processing failed.
Processing Abell, MD
Processing failed. Retrying...
Processing failed.
Processing Annandale, VA
Processing failed. Retrying...
Processing Tucson Park West #1, AZ
Processing failed. Retrying...
Processing failed.
Processing Allisonville, IN
Processing failed. Retrying...
Processing failed.
Processing Village at Fisher's Creek, WA
Processing failed. Retrying...
Processing failed.
Processing Wallace Township, PA
Processing failed. Retrying...
Processing failed.
Processing Oregon District, OH
Processing failed. Retrying...
Processing failed.
Processing Olentangy 

Processing Park Street, FL
Processing failed. Retrying...
Processing failed.
Processing Crockery Township, MI
Processing failed. Retrying...
Processing failed.
Processing Saline Township, MI
Processing failed. Retrying...
Processing failed.
Processing Golden Triangle, NJ
Processing failed. Retrying...
Processing Hill Valley, IN
Processing failed. Retrying...
Processing failed.
Processing Murrells Inlet, SC
Processing failed. Retrying...
Processing Mills River, NC
Processing failed. Retrying...
Processing Sheffield Village, CA
Processing failed. Retrying...
Processing failed.
Processing Coldspring, MD
Processing failed. Retrying...
Processing failed.
Processing Central Park, NY
Processing failed. Retrying...
Processing failed.
Processing Denney Whitford, OR
Processing failed. Retrying...
Processing failed.
Processing Colonial Gardens, MN
Processing failed. Retrying...
Processing failed.
Processing Tenderloin, CA
Processing failed. Retrying...
Processing failed.
Processing Douglas Park, 

Processing failed.
Processing Bedford-Stuyvesant, NY
Processing failed. Retrying...
Processing failed.
Processing North Granby, CT
Processing failed. Retrying...
Processing Hebron, KY
Processing failed. Retrying...
Processing Pleasant Grove, CA
Processing failed. Retrying...
Processing failed.
Processing Layhill, MD
Processing failed. Retrying...
Processing West Rockhill Township, PA
Processing failed. Retrying...
Processing failed.
Processing Socastee, SC
Processing failed. Retrying...
Processing Westside, NM
Processing failed. Retrying...
Processing failed.
Processing Neabsco, VA
Processing failed. Retrying...
Processing Glen Iris, AL
Processing failed. Retrying...
Processing failed.
Processing Highland, OR
Processing failed. Retrying...
Processing failed.
Processing Ravensworth, VA
Processing failed. Retrying...
Processing Crestwood South, AL
Processing failed. Retrying...
Processing failed.
Processing Laurence Harbor, NJ
Processing failed. Retrying...
Processing Marin Wood, CA
Proc

Processing failed.
Processing Quail Glen, CA
Processing failed. Retrying...
Processing failed.
Processing Judson, SC
Processing failed. Retrying...
Processing Historic District, CA
Processing failed. Retrying...
Processing failed.
Processing Smith Ranch, CA
Processing failed. Retrying...
Processing failed.
Processing Bellefonte, DE
Processing failed. Retrying...
Processing North Middletown, NJ
Processing failed. Retrying...
Processing Pea Ridge, WV
Processing failed. Retrying...
Processing Tolland, CT
Processing failed. Retrying...
Processing failed.
Processing Wallace, FL
Processing failed. Retrying...
Processing Potomac Mills, VA
Processing failed. Retrying...
Processing Hollins Market, MD
Processing failed. Retrying...
Processing failed.
Processing Freemont Park, CA
Processing failed. Retrying...
Processing failed.
Processing Woodlawn Beach, FL
Processing failed. Retrying...
Processing Bowman Heights, FL
Processing failed. Retrying...
Processing failed.
Processing Marina Cove, VA
Pr

Processing failed. Retrying...
Processing Stanford Crossing, CA
Processing failed. Retrying...
Processing failed.
Processing Toro Canyon, CA
Processing failed. Retrying...
Processing Bagdad, FL
Processing failed. Retrying...
Processing Sunset Road, NC
Processing failed. Retrying...
Processing failed.
Processing Franklin Park, OH
Processing failed. Retrying...
Processing failed.
Processing Comstock Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Fredon Township, NJ
Processing failed. Retrying...
Processing failed.
Processing Crestline, AL
Processing failed. Retrying...
Processing failed.
Processing Cunningham Heights, KS
Processing failed. Retrying...
Processing failed.
Processing Sheridan Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Providencia Park, FL
Processing failed. Retrying...
Processing failed.
Processing Bachman, TX
Processing failed. Retrying...
Processing failed.
Processing Oronoco Township, MN
Processing

Processing failed. Retrying...
Processing failed.
Processing Glasgow, DE
Processing failed. Retrying...
Processing Etowah, NC
Processing failed. Retrying...
Processing Estudillo Estates/Glen, CA
Processing failed. Retrying...
Processing failed.
Processing Negaunee Township, MI
Processing failed. Retrying...
Processing failed.
Processing Westminster Heights, FL
Processing failed. Retrying...
Processing failed.
Processing Lake Shore, WA
Processing failed. Retrying...
Processing Conestoga, PA
Processing failed. Retrying...
Processing Duxbury, VT
Processing failed. Retrying...
Processing failed.
Processing Ridgewood, TX
Processing failed. Retrying...
Processing failed.
Processing Sewaren, NJ
Processing failed. Retrying...
Processing Near Westside, IN
Processing failed. Retrying...
Processing failed.
Processing Vistas, ID
Processing failed. Retrying...
Processing failed.
Processing Forrest Park, VA
Processing failed. Retrying...
Processing failed.
Processing West Pasco, WA
Processing failed

Processing failed. Retrying...
Processing failed.
Processing Mount Vista, WA
Processing failed. Retrying...
Processing Kaseberg Kingswood, CA
Processing failed. Retrying...
Processing failed.
Processing Mannington Township, NJ
Processing failed. Retrying...
Processing failed.
Processing Government Hill, TX
Processing failed. Retrying...
Processing failed.
Processing Love Grove/Riviera Manor, FL
Processing failed. Retrying...
Processing failed.
Processing Silver Bluff, FL
Processing failed. Retrying...
Processing failed.
Processing Little River, SC
Processing failed. Retrying...
Processing Shields, MI
Processing failed. Retrying...
Processing Glenwood, CA
Processing failed. Retrying...
Processing failed.
Processing Cherry Grove Township, MI
Processing failed. Retrying...
Processing failed.
Processing Montague Township, NJ
Processing failed. Retrying...
Processing failed.
Processing Arundel, ME
Processing failed. Retrying...
Processing failed.
Processing Clam Lake Township, MI
Processing

Processing failed. Retrying...
Processing failed.
Processing South Camella, VA
Processing failed. Retrying...
Processing failed.
Processing Jordan Park, FL
Processing failed. Retrying...
Processing failed.
Processing Sudley, VA
Processing failed. Retrying...
Processing Glenwood Park, VA
Processing failed. Retrying...
Processing failed.
Processing Fawn River Township, MI
Processing failed. Retrying...
Processing failed.
Processing Farmington, VA
Processing failed. Retrying...
Processing failed.
Processing Fort Gratiot Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Robinson Township, MI
Processing failed. Retrying...
Processing failed.
Processing Union Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Polkton Charter Township, MI
Processing failed. Retrying...
Processing failed.
Processing Madison Township, PA
Processing failed. Retrying...
Processing failed.
Processing Eastwood, MI
Processing failed. Retrying...
Processi

In [88]:
places["Assault"].value_counts()

no data               1533
0                      579
169.61999999999998      53
195.063                 50
166.79299999999998      49
192.23600000000002      49
197.89                  48
183.755                 46
175.274                 45
178.101                 45
220.506                 44
237.468                 44
172.447                 42
147.004                 41
132.869                 41
98.945                  41
161.13899999999998      41
189.409                 41
228.987                 40
203.54399999999998      40
124.38799999999999      39
223.333                 38
214.852                 38
217.679                 38
144.177                 38
152.65800000000002      37
209.19799999999998      37
240.295                 36
231.81399999999996      36
155.485                 36
158.312                 35
163.96599999999998      35
110.253                 34
135.696                 34
212.02499999999998      34
245.94899999999998      32
19.789                  32
2

In [91]:
places.head(25)

Unnamed: 0,name,link,school_rating,nightlife_rating,families_rating,type_of_place,rented_percentage,owned_percentage,population,median_home_value,median_rent,median_household_income,area_feel,Assault,Murder,Rape,Robbery,Burglary,Theft,Motor Vehicle Theft,under_ten,ten_to_seventeen,eighteen_to_twentyfour,twentyfive_to_thirtyfour,thirtyfive_to_fourtyfour,fourtyfive_to_fiftyfour,fiftyfive_to_sixtyfour,over_sixtyfive,state,name_with_state
0,Chesterbrook,https://www.niche.com/places-to-live/chesterbr...,A+,A-,A+,"Suburb of Philadelphia, PA",34,66,5457,350200.0,2323.0,126687,Dense Suburban,209.20,4.51,30.12,100.27,390.08,1593.38,221.52,13,9,5,16,17,10,13,17,Pennsylvania,"Chesterbrook, PA"
1,Colonial Village,https://www.niche.com/places-to-live/n/colonia...,A+,A+,A+,"Neighborhood in Arlington, VA",66,34,2895,341217.0,1937.0,101975,No data available,195.06,4.21,28.08,93.49,285.06,1164.40,161.88,9,37,13,38,21,10,29,6,Virginia,"Colonial Village, VA"
2,Ardmore,https://www.niche.com/places-to-live/ardmore-m...,A+,A,A+,"Suburb of Philadelphia, PA",36,64,14391,353900.0,1462.0,107087,Urban Suburban Mix,175.27,3.78,25.23,84.01,340.07,1389.10,193.12,12,10,6,15,18,11,10,19,Pennsylvania,"Ardmore, PA"
3,Hyde Park - Spanishtown Creek,https://www.niche.com/places-to-live/n/hyde-pa...,A,A+,A+,"Neighborhood in Tampa, FL",63,37,2473,469284.0,1673.0,132471,Urban,no data,no data,no data,no data,no data,no data,no data,17,36,13,40,21,17,29,6,Florida,"Hyde Park - Spanishtown Creek, FL"
4,Penn Wynne,https://www.niche.com/places-to-live/penn-wynn...,A+,B+,A+,"Suburb of Philadelphia, PA",12,88,6226,395300.0,1515.0,143520,Urban Suburban Mix,189.41,4.09,27.27,90.79,365.07,1491.24,207.32,13,10,4,8,13,12,16,25,Pennsylvania,"Penn Wynne, PA"
5,City Center,https://www.niche.com/places-to-live/n/city-ce...,A+,A+,A+,"Neighborhood in Santa Monica, CA",96,4,4103,2000001.0,2619.0,89765,Dense Urban,no data,no data,no data,no data,no data,no data,no data,6,25,13,42,21,20,29,8,California,"City Center, CA"
6,Cambridgeport,https://www.niche.com/places-to-live/n/cambrid...,A,A+,A+,"Neighborhood in Cambridge, MA",65,35,10634,876214.0,2208.0,114827,Dense Urban,96.12,2.07,13.84,46.07,130.03,531.13,73.84,17,52,13,30,21,7,29,7,Massachusetts,"Cambridgeport, MA"
7,North Quarter,https://www.niche.com/places-to-live/n/north-q...,us,A+,A,"Neighborhood in Orlando, FL",87,13,1445,171100.0,1423.0,65698,Urban,no data,no data,no data,no data,no data,no data,no data,2,31,13,45,21,13,29,11,Florida,"North Quarter, FL"
8,Devon,https://www.niche.com/places-to-live/devon-che...,A+,B+,A+,"Suburb of Philadelphia, PA",3,97,1981,663300.0,2177.0,197820,Rural,243.12,5.25,35.00,116.53,440.09,1797.66,249.92,7,20,7,8,8,23,17,10,Pennsylvania,"Devon, PA"
9,Great Neck Gardens,https://www.niche.com/places-to-live/great-nec...,A+,A,A+,"Suburb of New York City, NY",33,67,1258,941500.0,4440.6,218603,Sparse Suburban,245.95,5.31,35.41,117.89,450.09,1838.52,255.60,7,11,5,6,13,9,7,42,New York,"Great Neck Gardens, NY"


In [92]:
places.to_csv("niche_updated_not_completed.csv", index=False)