In [1]:
import ipywidgets as ip
import geopandas as gpd
from ipywidgets import interact
from ipywidgets import widgets
import matplotlib.pyplot as plt
from shapely.geometry import Point
import numpy as np
import pandas as pd
from sklearn.externals import joblib


In [2]:
#Read in csv's containing information and keys for users, weather description, stations, boroughs, and neighborhoods. 
user_total=pd.read_csv('User_Code.csv')
weather_total=pd.read_csv('Weather_Code.csv')
stations=pd.read_csv('Station_List_With_Info.csv')
borough_total=pd.read_csv('Borough_Code.csv')
neighborhood_total=pd.read_csv('Neighborhood_Code.csv')



In [3]:
#Read in Machine Learning Model
test_tree = joblib.load('random_forest_bruce.pkl')
station_names=set(stations['start station name'])
weather_slider=set(weather_total['0'])

In [4]:
#Helper Functions
#Retrieve list of station characteristics from a stations dataframe. 
def station_values(station_name):
    station_entry=stations[stations['start station name']==station_name]
    stations_column=list(station_entry.iloc[0])
    return(stations_column[1:])
#Convert usertype to numeric (0,1)
def user_find(user):
    user_entry=user_total[user_total['0']==user].iloc[0]
    return(user_entry[0])
#Convert weather description to numeric (according to csv)
def weather(weather_desc):
    weather_entry=weather_total[weather_total['0']==weather_desc].iloc[0]
    return(weather_entry[0])
#Convering gender to numeric (0,1)
def gender_desc(gender):
    if gender=='Male':
        return(0)
    elif gender=='Female':
        return(1)
#Convert neighborhood number to actual name (for maps)
def neighborhood_name(hood_number):
    hood_entry=neighborhood_total.iloc[hood_number,1]
    return(hood_entry)

#Plotting neighborhoods in NYC map. 
def plot_hood(hood, bood, base_color = 'tan', hood_color = 'red'):
    # Neighborhood Tabulation Area (Census tract Stuff)
    #Note, this is the location of nynta on Jesse's Computer, this shape file
    # is available on drop box in the census map folder

    fname = "nynta.shp"
    coord_system = {'init': 'epsg:4326'}
    
    #Create a geopandas file with boroughs of interest. 
    ntas = gpd.read_file(fname).to_crs(coord_system)
    ntas = ntas.loc[ntas['BoroName'].isin(['Queens','Brooklyn', 'Manhattan'])]
    ntas=ntas.reset_index(drop=True)
    ntas.rename(columns={'NTAName': 'Neighborhood'}, inplace = True)
    
    # Create base map of three boroughs with NTA neighborhoods
    base = ntas.plot(figsize=(500,20), color= base_color, edgecolor='black')
    
    #create a map of the borough passed as 'hood' on top of the above base map
    z = ntas.loc[ntas['Neighborhood'] == hood]
    y=ntas.loc[ntas['Neighborhood'] == bood]
    z.plot(ax = base, figsize=(500,20), color=hood_color, edgecolor='black')
    y.plot(ax = base, figsize=(500,20), color='blue', edgecolor='black')

    #plot it. 
    plt.show()

In [5]:
#Predicting the End stations, by converting the entered data into the train format, then running it through the ML model.
#The model then outputs a neighborhood, which is then fed into a map for a visualization of where one will go.
def predict_end_station(start_station,usertype,birth_year,gender,time,humidity,temp,weatherdesc,windspeed,day):
    station_list=station_values(start_station)
    station_id=station_list[0]
    station_latitude=station_list[2]
    station_longitude=station_list[3]
    station_neighborhood=station_list[4]
    station_income=station_list[5]
    station_borough=station_list[6]
    user=user_find(usertype)
    weather_desc=weather(weatherdesc)
    gender=gender_desc(gender)
    enter_list=[station_id,station_latitude,station_longitude,user,birth_year,gender,time
               ,humidity,temp,weather_desc,windspeed,day,station_neighborhood,
               station_borough,station_income]
    enter_np=np.array(enter_list)
    enter_np=enter_np.reshape(1,-1)
    prediction=test_tree.predict(enter_np)
    prediction_int=int(prediction)
    start_neighborhood_string=neighborhood_name(int(station_neighborhood))
    end_neighborhood_string=neighborhood_name(prediction_int)
    plot_hood(end_neighborhood_string,start_neighborhood_string)
    print(start_neighborhood_string, ' (Blue) to',end_neighborhood_string, '(Red)')

In [6]:
#Create interactive Widget interface
interact(predict_end_station, start_station=station_names, birth_year=widgets.IntSlider(min=1900,max=2010,step=1,value=1950),gender=['Male','Female']
        , usertype=['Customer','Subscriber'], time=widgets.IntSlider(min=0,max=2300,step=100,value=600), humidity=widgets.IntSlider(min=0,max=100,step=1,value=50)
        , temp=widgets.IntSlider(min=-20,max=100,step=1,value=50), weatherdesc=weather_slider,
        windspeed=widgets.IntSlider(min=0,max=50,step=1,value=20),day=[0,1,2,3,4,5,6])

Widget Javascript not detected.  It may not be installed or enabled properly.


<function __main__.predict_end_station>