In [136]:
import pandas as pd
import datetime as dt
import re
import numpy as np
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', 100)

from matplotlib import pyplot as plt
%matplotlib inline 

import seaborn as sns
sns.set_style('darkgrid')

from sklearn.linear_model import Lasso, Ridge, ElasticNet, LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

import math

In [3]:
url = 'https://data.wprdc.org/dataset/d1eb0fcd-ba60-4407-9969-ceef464d0c00/resource/36c61dbc-9e23-4c77-9f09-ca544d3c6174/download/schedule_daily_detail.csv'
trip_detail = pd.read_csv(url)

In [4]:
def format_time(timestring):
    timestring = timestring.strip()
    end = re.findall('[A-Za-z]$', timestring)
    if len(end)==0:
        print(timestring + ' has no letters')
        try: 
            return dt.datetime.strptime(timestring, '%H%M')
        except:
            return pd.NaT
    elif end[0] in ['a', 'p']:
        timestring = timestring + 'm'
    elif end[0] == 'x':
        timestring = re.sub('x', 'am', timestring)
    else:
        print(timestring + ' has no letters')
        timestring = re.sub('[A-Za-z].*$', '', timestring)
    try: 
        return dt.datetime.strptime(timestring, '%I%M%p')
    except:
        return pd.NaT

In [5]:
starts = [format_time(t) for t in trip_detail['Trip.Start']]
trip_detail['start_dt'] = starts
ends = [format_time(t) for t in trip_detail['Trip.End']]
trip_detail['end_dt'] = ends
trip_detail['start_hour'] = trip_detail['start_dt'].dt.hour
trip_detail['end_hour'] = trip_detail['end_dt'].dt.hour

In [6]:
trip_detail.dtypes

Service.Context                    object
Schedule.Type                      object
Booking                             int64
Number.of.InSrv.Trip.s.             int64
Garage                              int64
Route                              object
Trip.Start                         object
Trip.End                           object
Trip.Block                         object
Trp.Off.Serv.Duration             float64
Trp.In.Serv.Duration              float64
Trp.Layover.Duration              float64
Trp.Total.Duration                float64
Trp.In.Serv.Dist.                 float64
Trp.Off.Serv.Dist.                float64
Trp.Dist.                         float64
Speed.Inbound                     float64
Speed.Outbound                    float64
Trip.Variant                       object
Trip.is.Pull.Out                  float64
Trip.is.Pull.In                   float64
Trip..                              int64
Day.Type                           object
start_dt                   datetim

In [7]:
def time_per(hour):
    peak_min1 = 7
    peak_max1 = 10
    peak_min2 = 16
    peak_max2 = 19
    if (peak_min1 <= hour < peak_max1):
        return "AM"
    elif (peak_min2 <= hour < peak_max2):
        return "PM"
    else:
        return "OFFPEAK"

def peak_or_not(hour, trip_variant):
    #print(trip_variant)
    peak_min1 = 7
    peak_max1 = 10
    peak_min2 = 16
    peak_max2 = 19
    try:
        trip_variant[0]
    except:
        return 0
    if (((peak_min1 <= hour < peak_max1) & (trip_variant[0]=="I"))
        |((peak_min2 <= hour < peak_max2)& (trip_variant[0]=="O"))):
        return 1
    else:
        return 0

trip_detail["TimePer"] = [time_per(h) for h in trip_detail['start_hour']]
trip_detail["Peak"] = [peak_or_not(trip_detail['start_hour'][i], trip_detail['Trip.Variant'][i]) for i in range(len(trip_detail))]

In [8]:
trip_detail.Route = [r.lstrip('0') for r in trip_detail.Route]

In [9]:
def get_peak_count(df):
    df = df[(df.Booking == 1909)&(trip_detail['Day.Type']=="Weekday")]
    df = df.groupby(['Route', 'Peak'])
    df = df.agg({"Trip..": "count", 'Trp.In.Serv.Duration': 'mean'}).reset_index()
    df.columns = ['Route', 'Peak', "TripCount", 'Trip_HRS_Service']
    return df

In [10]:
peak_counts = get_peak_count(trip_detail)
peak_counts.to_csv('../Outputs/trip_counts_peak.csv')

In [11]:
peak_counts.Route = [r.lstrip('0') for r in peak_counts.Route]
peak_counts

Unnamed: 0,Route,Peak,TripCount,Trip_HRS_Service
0,1,0,38,1.392474
1,1,1,10,1.489800
2,11,0,39,0.401538
3,11,1,11,0.377545
4,12,0,37,1.086405
...,...,...,...,...
191,Y46,1,13,1.139692
192,Y47,0,38,0.840447
193,Y47,1,10,0.860200
194,Y49,0,39,0.871410


In [12]:
int(False)

0

In [13]:
def get_from_peak_table(route, peak=True, trip_not_time=True):
    #print(route)
    try:
        p = peak_counts[((peak_counts['Route']==route)&(peak_counts['Peak']==int(peak)))]
    #   print(p)
    except:
        print(route + ' not found')
        return 0
    if trip_not_time:
        try:
            cnt = p["TripCount"].iloc[0]
        except:
    #        print(p + 'not indexed')
            return 0
    else:
        try:
            cnt = p["Trip_HRS_Service"].iloc[0]
        except:
    #        print(p + 'not indexed')
            return 0
    return cnt

# Modeling Ridership Estimate

In [14]:
path1 = "../Data/RouteMatrix.xlsx"
xls1 = pd.ExcelFile(path1)
sheet_list = xls1.sheet_names  # see all sheet names
m = pd.read_excel(xls1, 'Matrix',  header=0)

In [15]:
def to_float(num):
    try:
        float(num)
        return float(num)
    except ValueError:
        return np.nan

In [212]:
matrix = m.iloc[:,:-45]
conv_col = ['Weekday_Avg_Headway_Inbound_Peak', 'Weekday_Avg Ridership',
       'Weekday_Avg_Headway_Inbound_OffPeak',
       'Weekday_Avg_Headway_Outbound_Peak',
       'Weekday_Avg_Headway_Outbound_OffPeak', 'Weekday_Peak_No_Units',
       'Saturday_Avg Ridership', 'Saturday_Avg_Headway_Inbound',
       'Saturday_Avg_Headway_Outbound', 'Saturday_# of Units',
       'Sunday_Avg Ridership',
       'Sunday_Avg_Headway_Inbound', 'Sunday_Avg_Headway_Outbound',
       'Sunday_# of Units']
for c in conv_col:
    matrix[c] = [to_float(a) for a in matrix[c]]

In [213]:
matrix['Weekday_Trips'] = matrix[['Weekday_Trips_Outbound', 'Weekday_Trips_Inbound']].sum(axis=1)
matrix['Sat_Trips'] = matrix[['Saturday_Trips_Outbound','Saturday_Trips_Inbound']].sum(axis=1)
matrix['Sun_Trips'] = matrix[['Sunday_Trips_Outbound','Sunday_Trips_Inbound']].sum(axis=1)
matrix['Wkdy_Peak_Hdwy'] = matrix[['Weekday_Avg_Headway_Inbound_Peak','Weekday_Avg_Headway_Outbound_Peak']].dropna().min(axis=1)
matrix['Wkdy_Offpeak_Hdwy'] = matrix[['Weekday_Avg_Headway_Inbound_OffPeak','Weekday_Avg_Headway_Outbound_OffPeak']].dropna().max(axis=1)
matrix['Sat_Hdwy'] = matrix[['Saturday_Avg_Headway_Inbound','Saturday_Avg_Headway_Outbound']].dropna().mean(axis=1)
matrix['Sun_Hdwy'] = matrix[['Sunday_Avg_Headway_Inbound','Sunday_Avg_Headway_Outbound']].dropna().mean(axis=1)
matrix = matrix[['Route Number', 'Walkshed Population', 'Avg Equity Score', 'Public Transportation Commute', 
       'No Vehicle Households', 'Median Income', 'Total Ridership', 'Weekday_Avg Ridership',
                 'Saturday_Avg Ridership', 'Sunday_Avg Ridership', 'Weekday_Trips',
       'Sat_Trips',       'Sun_Trips',       'Wkdy_Peak_Hdwy',       'Wkdy_Offpeak_Hdwy',       'Sat_Hdwy', 'Sun_Hdwy']]
matrix['Peak_Trips'] = [get_from_peak_table(r, peak=True, trip_not_time=True) for r in matrix['Route Number']]
matrix['OffPeak_Trips'] = matrix['Weekday_Trips']-matrix['Peak_Trips']
matrix['Peak_Runtime'] = [get_from_peak_table(r, peak=True, trip_not_time=False) for r in matrix['Route Number']]
matrix['Offpeak_Runtime'] = [get_from_peak_table(r, peak=False, trip_not_time=False) for r in matrix['Route Number']]

In [214]:
reliable = pd.read_csv('https://data.wprdc.org/datastore/dump/00eb9600-69b5-4f11-b20a-8c8ddd8cfe7a')
rel_grp = reliable.groupby(['route', 'route_full_name', 'day_type'])
rel_grp = rel_grp.agg({"on_time_percent": "mean", 'month_start': 'count'}).reset_index()

In [215]:
def get_ontime(route, daytype):
    #print(route)
    try:
        p = rel_grp.loc[((rel_grp['route']==str(route))&(rel_grp['day_type']==daytype)),'on_time_percent'].iloc[0]
        return p*100
    except:
        print(route + ' not found')
        return 0

In [216]:
matrix['Wkdy_Ontime'] = [get_ontime(r, daytype='WEEKDAY') for r in matrix['Route Number']] 
matrix['Sat_Ontime'] = [get_ontime(r, daytype='SAT.') for r in matrix['Route Number']]
matrix['Sun_Ontime'] = [get_ontime(r, daytype='SUN.') for r in matrix['Route Number']]

In [217]:
matrix.head()

Unnamed: 0,Route Number,Walkshed Population,Avg Equity Score,Public Transportation Commute,No Vehicle Households,Median Income,Total Ridership,Weekday_Avg Ridership,Saturday_Avg Ridership,Sunday_Avg Ridership,Weekday_Trips,Sat_Trips,Sun_Trips,Wkdy_Peak_Hdwy,Wkdy_Offpeak_Hdwy,Sat_Hdwy,Sun_Hdwy,Peak_Trips,OffPeak_Trips,Peak_Runtime,Offpeak_Runtime,Wkdy_Ontime,Sat_Ontime,Sun_Ontime
0,52L,22917,0.2371,0.160638,0.193936,46782,110696,429.0,,,24,0,0,28.728311,,,,8,16,0.920625,0.6105,67.119231,0.0,0.0
1,53,13208,0.2296,0.1376,0.222329,47659,17501,,337.0,,0,34,0,,,59.911111,,0,0,0.0,0.0,0.0,72.144483,8.922222
2,53L,37038,0.2072,0.1464,0.195652,53303,377123,1462.0,,,55,0,0,24.33179,59.248052,,,13,42,1.196077,1.028595,65.950256,0.0,0.0
3,55,20479,0.235,0.0799,0.253619,34649,331946,1005.0,759.0,601.0,36,36,34,60.0,60.061506,59.763636,60.244213,0,36,0.0,0.0,74.32641,72.417949,74.829487
4,56,32080,0.238,0.1087,0.275747,35137,516895,1700.0,823.0,647.0,60,34,32,25.144712,49.667461,60.0,60.127273,0,60,0.0,0.0,68.416154,73.719487,78.109744


In [226]:
matrix['Weekend_Avg_Ridership'] =  matrix[['Saturday_Avg Ridership','Sunday_Avg Ridership']].sum(axis=1)
matrix['Weekend_Trips'] =  matrix[['Sat_Trips','Sun_Trips']].sum(axis=1)
matrix['Weekend_Hdwy'] =  matrix[['Sat_Hdwy','Sun_Hdwy']].max(axis=1)
matrix['Weekend_Ontime'] =  matrix[['Sat_Ontime','Sun_Ontime']].mean(axis=1)

In [228]:
matrix_p7 = matrix[matrix['Route Number']=='P7']
matrix_p7['OffPeak_Trips'] = matrix_p7['OffPeak_Trips'] + 26
matrix_p7['Weekday_Trips'] = 55
matrix_p7['Wkdy_Offpeak_Hdwy'] = 30
matrix_p7['Weekend_Trips'] = 24
matrix_p7['Weekend_Hdwy'] = 70

In [230]:
wkdy_matrix_p7 = matrix_p7.loc[matrix_p7['Weekday_Avg Ridership'].isna()==False,['Walkshed Population', 'Avg Equity Score',
       'Public Transportation Commute', 'No Vehicle Households',
       'Median Income', 'Wkdy_Ontime', 'Weekday_Trips','Wkdy_Peak_Hdwy',
       'Wkdy_Offpeak_Hdwy', 'Peak_Trips',
       'Peak_Runtime', 'Offpeak_Runtime']]

In [193]:
wkdy_x = matrix.loc[matrix['Weekday_Avg Ridership'].isna()==False,['Walkshed Population', 'Avg Equity Score',
       'Public Transportation Commute', 'No Vehicle Households',
       'Median Income', 'Wkdy_Ontime', 'Weekday_Trips','Wkdy_Peak_Hdwy',
       'Wkdy_Offpeak_Hdwy', 'Peak_Trips',
       'Peak_Runtime', 'Offpeak_Runtime']]
wkdy_y = matrix.loc[matrix['Weekday_Avg Ridership'].isna()==False,['Weekday_Avg Ridership']]
wkdy_x.fillna(wkdy_x.mean(), inplace=True)

In [231]:
wend_x = matrix.loc[(matrix['Weekend_Avg_Ridership'].isna()==False)&(matrix['Weekend_Avg_Ridership'] !=0),
                           ['Walkshed Population', 'Avg Equity Score',
       'Public Transportation Commute', 'No Vehicle Households',
       'Median Income', 'Weekend_Ontime', 'Weekend_Trips','Weekend_Hdwy',
       'Offpeak_Runtime']]
wend_y = matrix.loc[(matrix['Weekend_Avg_Ridership'].isna()==False)&(matrix['Weekend_Avg_Ridership'] !=0)
                    ,['Weekend_Avg_Ridership']]
wend_x.fillna(wend_x.mean(), inplace=True)

In [241]:
wend_matrix_p7 = matrix_p7.loc[:,
                           ['Walkshed Population', 'Avg Equity Score',
       'Public Transportation Commute', 'No Vehicle Households',
       'Median Income', 'Weekend_Ontime', 'Weekend_Trips','Weekend_Hdwy',
       'Offpeak_Runtime']]

# Modeling
## First a simple linear regression

In [194]:
X_train, X_test, y_train, y_test = train_test_split(wkdy_x, wkdy_y, test_size=0.30, random_state=40)

In [195]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred_train = lr_model.predict(X_train)
y_pred_test = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print('R2 Train: {}'.format(r2_score(y_train, y_pred_train)))
print('R2 Test: {}'.format(r2_score(y_test, y_pred)))
print('RMSE: {}'.format(rmse))

R2 Train: 0.9740353130538549
R2 Test: -0.45897770128631077
RMSE: 944.2811970767843


This model does horribly. Let's try with some regularization:
## Lasso

In [196]:
lasso_model = Lasso(random_state=1234,
                  tol=1)
lasso_model.fit(X_train, y_train)

y_pred_train = lasso_model.predict(X_train)
y_pred_test = lasso_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print('R2 Train: {}'.format(r2_score(y_train, y_pred_train)))
print('R2 Test: {}'.format(r2_score(y_test, y_pred_test)))

print('RMSE: {}'.format(rmse))

R2 Train: 0.9648961061417406
R2 Test: 0.44677572134363386
RMSE: 944.2811970767843


In [197]:
enet_mod = ElasticNet(alpha=1.0,
                  l1_ratio=0.5,
                  random_state=1234,
                  tol=1)
enet_mod.fit(X_train, y_train)

pred_train_= enet_mod.predict(X_train)
pred_test_= enet_mod.predict(X_test)

print('R2 Train: {}'.format(r2_score(y_train, pred_train_)))
print('R2 Test: {}'.format(r2_score(y_test, pred_test_)))
print('RMSE: {}'.format(np.sqrt(mean_squared_error(y_test,pred_test_))))


R2 Train: 0.9504576081257767
R2 Test: 0.5910741212860098
RMSE: 499.9186197200794


Elastic net seems to be doing much better than the other regression models

In [198]:
enet = ElasticNet(alpha=1.0,
                  l1_ratio=0.5,
                  random_state=1234,
                  tol=1)
enet.fit(wkdy_x, wkdy_y)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=1234, selection='cyclic', tol=1, warm_start=False)

In [199]:
enet_mod.predict(wkdy_matrix_p7)

array([2255.56001828])

In [247]:
matrix

Unnamed: 0,Route Number,Walkshed Population,Avg Equity Score,Public Transportation Commute,No Vehicle Households,Median Income,Total Ridership,Weekday_Avg Ridership,Saturday_Avg Ridership,Sunday_Avg Ridership,Weekday_Trips,Sat_Trips,Sun_Trips,Wkdy_Peak_Hdwy,Wkdy_Offpeak_Hdwy,Sat_Hdwy,Sun_Hdwy,Peak_Trips,OffPeak_Trips,Peak_Runtime,Offpeak_Runtime,Wkdy_Ontime,Sat_Ontime,Sun_Ontime,Weekend_Avg_Ridersthip,Weekend_Trips,Weekend_Hdwy,Weekend_Ontime,Weekend_Avg_Ridership
0,52L,22917,0.2371,0.160638,0.193936,46782,110696,429.0,,,24,0,0,28.728311,,,,8,16,0.920625,0.6105,67.119231,0.0,0.0,0.0,0,,0.0,0.0
1,53,13208,0.2296,0.1376,0.222329,47659,17501,,337.0,,0,34,0,,,59.911111,,0,0,0.0,0.0,0.0,72.144483,8.922222,337.0,34,59.911111,40.533352,337.0
2,53L,37038,0.2072,0.1464,0.195652,53303,377123,1462.0,,,55,0,0,24.33179,59.248052,,,13,42,1.196077,1.028595,65.950256,0.0,0.0,0.0,0,,0.0,0.0
3,55,20479,0.235,0.0799,0.253619,34649,331946,1005.0,759.0,601.0,36,36,34,60.0,60.061506,59.763636,60.244213,0,36,0.0,0.0,74.32641,72.417949,74.829487,1360.0,70,60.244213,73.623718,1360.0
4,56,32080,0.238,0.1087,0.275747,35137,516895,1700.0,823.0,647.0,60,34,32,25.144712,49.667461,60.0,60.127273,0,60,0.0,0.0,68.416154,73.719487,78.109744,1470.0,66,60.127273,75.914615,1470.0
5,57,21417,0.2628,0.1739,0.30856,33562,414276,1275.0,927.0,674.0,62,38,36,24.3875,47.589928,59.8125,60.410764,0,62,0.0,0.0,67.773333,63.16641,72.871026,1601.0,74,60.410764,68.018718,1601.0
6,59,48108,0.2783,0.1909,0.29047,33826,724016,2142.0,1912.0,1308.0,38,36,28,60.0,59.608302,60.406998,60.192449,0,38,0.0,0.0,67.06359,67.488718,67.56359,3220.0,64,60.406998,67.526154,3220.0
7,60,14204,0.302,0.1292,0.330309,25676,145956,566.0,,,30,0,0,59.75,59.892857,,,0,30,0.0,0.0,80.99,6.617778,0.0,0.0,0,,3.308889,0.0
8,61A,79046,0.2546,0.2088,0.247956,37280,1560788,4895.0,3210.0,2381.0,110,88,64,15.837414,23.463483,24.820209,36.667135,20,90,1.17015,1.020189,60.572051,68.842308,66.023846,5591.0,152,36.667135,67.433077,5591.0
9,61B,71049,0.2418,0.1992,0.238222,38560,1380803,4394.0,2708.0,1935.0,114,98,66,16.572146,24.426284,24.90887,36.096045,20,94,0.94015,0.824468,60.85,68.711538,68.567692,4643.0,164,36.096045,68.639615,4643.0


In [200]:
matrix[matrix['Route Number']=='P7']

Unnamed: 0,Route Number,Walkshed Population,Avg Equity Score,Public Transportation Commute,No Vehicle Households,Median Income,Total Ridership,Weekday_Avg Ridership,Weekday_Trips,Sat_Trips,Sun_Trips,Wkdy_Peak_Hdwy,Wkdy_Offpeak_Hdwy,Sat_Hdwy,Sun_Hdwy,Peak_Trips,OffPeak_Trips,Peak_Runtime,Offpeak_Runtime,Wkdy_Ontime,Sat_Ontime,Sun_Ontime
34,P7,51470,0.2869,0.25,0.285881,40514,197203,764.0,29,0,0,24.344444,,,,10,19,0.8699,0.843,57.640513,0.0,0.0


In [201]:
764/29#*55

26.344827586206897

In [202]:
pred_enet = enet.predict(wkdy_x)
print(np.sqrt(mean_squared_error(wkdy_y,pred_enet)))
print(r2_score(wkdy_y, pred_enet))

516.5417193027964
0.9438685482040371


In [207]:
print( list(zip(matrix['Route Number'], pred_enet) ))

[('52L', 167.64341811323902), (53, 1333.6067491632334), ('53L', 775.9544987180755), (55, 1677.4428890426864), (56, 1661.0783495590535), (57, 1432.7891786047835), (59, 596.3234381704231), (60, 4795.270332258749), ('61A', 4770.331597845998), ('61B', 4955.045063301532), ('61C', 5572.755226177253), ('61D', 2191.5844104541407), (64, 287.277175145442), (65, 2292.2858240522746), (67, -584.4551403993851), (68, 2128.568703834294), (69, -545.2779107290362), (71, 5342.88923155932), ('71C', 5513.901681413161), ('71D', 1366.360517911703), (74, 2454.8493837208644), (77, 3037.152255880712), (79, 2350.330535409321), (86, 9783.00667793325), (93, 1142.5864626058114), ('P1', 646.8918139657635), ('P12', -27.20578154806526), ('P16', 952.3845536862602), ('P17', 4329.524020039335), ('P2', 277.2862838787464), ('P3', 1572.034241108665), ('P67', 390.62844675842916), ('P68', 1168.0949009393796), ('P69', 695.4373112904611), ('P7', 1004.2400892635544), ('P71', 1773.3835708705867)]


In [203]:
enet.predict(matrix_p7)

array([2171.87360059])

In [204]:
print( list(zip(wkdy_x.columns, enet.coef_) ))

[('Walkshed Population', 0.014507276688218924), ('Avg Equity Score', -0.0), ('Public Transportation Commute', -0.0), ('No Vehicle Households', -0.0), ('Median Income', -0.03071522607827541), ('Wkdy_Ontime', -21.373093727210787), ('Weekday_Trips', 36.72489609147823), ('Wkdy_Peak_Hdwy', 8.798882947149757), ('Wkdy_Offpeak_Hdwy', -4.536071448201992), ('Peak_Trips', 19.809061401121692), ('Peak_Runtime', 26.55317512930801), ('Offpeak_Runtime', 33.505712708629865)]


In [245]:
offpeak_factor = 36.72489609147823-19.809061401121692
offpeak_factor

16.915834690356537

Weekend prediction

In [233]:
X_train_wnd, X_test_wnd, y_train_wnd, y_test_wnd = train_test_split(wend_x, wend_y, test_size=0.30, random_state=40)

In [237]:
enet_wend_train = ElasticNet(alpha=1.0,
                  l1_ratio=0.5,
                  random_state=1234,
                  tol=1)
enet_wend_train.fit(X_train_wnd, y_train_wnd)

pred_train_= enet_wend_train.predict(X_train_wnd)
pred_test_= enet_wend_train.predict(X_test_wnd)

print('R2 Train: {}'.format(r2_score(y_train_wnd, pred_train_)))
print('R2 Test: {}'.format(r2_score(y_test_wnd, pred_test_)))
print('RMSE: {}'.format(np.sqrt(mean_squared_error(y_test_wnd,pred_test_))))


R2 Train: 0.8532786971753774
R2 Test: 0.09765123580034096
RMSE: 2559.449515963371


In [238]:
enet_wend = ElasticNet(alpha=1.0,
                  l1_ratio=0.5,
                  random_state=1234,
                  tol=1)
enet_wend.fit(wend_x, wend_y)
pred_enet_wend = enet_wend.predict(wend_x)
print(np.sqrt(mean_squared_error(wend_y,pred_enet_wend)))
print(r2_score(wend_y, pred_enet_wend))

856.2445465140992
0.8711391045794029


In [243]:
enet_wend.predict(wend_matrix_p7)

array([676.24550757])

In [246]:
print( list(zip(wend_x.columns, enet_wend.coef_) ))

[('Walkshed Population', 0.003669094188665355), ('Avg Equity Score', 6.166990353544749), ('Public Transportation Commute', -0.9404378249978783), ('No Vehicle Households', -0.0), ('Median Income', -0.042482033624603927), ('Weekend_Ontime', -9.408654581526047), ('Weekend_Trips', 37.599570561379245), ('Weekend_Hdwy', -12.550182240699671), ('Offpeak_Runtime', 122.1586396335817)]
