# POI based analysis

In [14]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.float_format = '{:,.2f}'.format

## read data
* od matrices: Prt, PuT

In [15]:
od_PuT = pd.read_csv('data//MTX_203.csv')
del od_PuT['Unnamed: 0']
od_PuT.columns = [int(c) for c in od_PuT.columns]
od_PrT = pd.read_csv('data//MTX_204.csv')
del od_PrT['Unnamed: 0']
od_PrT.columns = [int(c) for c in od_PrT.columns]

In [16]:
od_PrT = od_PrT.stack().reset_index()
od_PrT.columns=['Z_Rejon',"Do_Rejon", 'TripsPrT']
od_PuT = od_PuT.stack().reset_index()
od_PuT.columns=['Z_Rejon',"Do_Rejon", 'TripsPuT']
od = pd.merge(od_PrT, od_PuT, on = ['Z_Rejon', "Do_Rejon"])

* trips

In [17]:
trips = pd.read_csv('data//From_Via_To.csv')
del trips['Unnamed: 0']

Unnamed: 0,Z_Rejon,POI,Do_Rejon,Czas_PuT,Czas_PrT
0,1.0,1.0,1.0,2283.62,520.28
1,1.0,1.0,2.0,2221.62,654.82
2,1.0,1.0,3.0,2221.62,636.82
3,1.0,1.0,4.0,2504.62,667.21
4,1.0,1.0,5.0,3013.62,785.48


## prepare

In [34]:
trips.Czas_PrT = trips.Czas_PrT/60 # przeliczam na minuty
trips.Czas_PuT = trips.Czas_PuT/60 *0.5 # tu zakladam dwa razy krotsze, dla obliczen, w Warszawie tak nie bedzie
BUDGET_PRT = trips['Czas_PrT'].quantile(0.5) # przyjmuje ze liczy sie tylko dolna polowa czasow
BUDGET_PUT = BUDGET_PRT #trips['Czas_PuT'].quantile(0.5) # a tu juz zakladam ze budzety sa rowne
trips['time_left_PrT']=BUDGET_PRT - trips.Czas_PrT # ile czasu zostaje ponizej budzetu w POI
trips['time_left_PuT']=BUDGET_PUT - trips.Czas_PuT
pd.options.display.float_format = '{:,.1f}'.format

In [18]:
trips = pd.merge(trips,od, on = ['Z_Rejon',"Do_Rejon"])
trips['odPair'] = trips["Z_Rejon"].astype(int)*1000+trips["Do_Rejon"].astype(int)

Unnamed: 0,Z_Rejon,POI,Do_Rejon,Czas_PuT,Czas_PrT,TripsPrT,TripsPuT,odPair
0,1.0,1.0,1.0,2283.62,520.28,2.35,1.6,1001
1,1.0,2.0,1.0,2781.72,489.47,2.35,1.6,1001
2,1.0,3.0,1.0,4538.03,1070.58,2.35,1.6,1001
3,1.0,4.0,1.0,2372.85,517.89,2.35,1.6,1001
4,1.0,5.0,1.0,3400.56,833.29,2.35,1.6,1001


# POI-based analysis:
## number of people

In [26]:
print('number of people (potentially) per POI - PuT/PrT')
POI_ppl = trips[trips.Czas_PrT<BUDGET_PRT].groupby(by=['POI'])['TripsPrT'].sum().to_frame('nPleoplePrT')
POI_ppl['nPeoplePuT'] = trips[trips.Czas_PuT<BUDGET_PUT].groupby(by=['POI'])['TripsPuT'].sum()
POI_ppl

number of people (potentially) per POI - PuT/PrT


Unnamed: 0_level_0,nPleoplePrT,nPeoplePuT
POI,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,5971.2,60.2
2.0,13863.0,550.3
3.0,810.9,385.7
4.0,2011.0,8.9
5.0,7328.1,18.8
7.0,7.1,
8.0,14084.3,681.1
9.0,14477.0,1608.7
10.0,13556.8,689.1
11.0,13395.0,193.3


## number paths/OD pairs

In [28]:
print('number of od pairs within budget per POI - PuT/PrT')
POI_pairs = trips[trips.Czas_PrT<BUDGET_PRT].groupby(by=['POI'])['odPair'].nunique().to_frame('nODPairsPrT')
POI_pairs['nODPairsPuT'] = trips[trips.Czas_PuT<BUDGET_PUT].groupby(by=['POI'])['odPair'].nunique()
POI_pairs

number of od pairs within budget per POI - PuT/PrT


Unnamed: 0_level_0,nODPairsPrT,nODPairsPuT
POI,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,156,9.0
2.0,463,17.0
3.0,25,18.0
4.0,106,3.0
5.0,178,6.0
7.0,1,
8.0,473,10.0
9.0,528,43.0
10.0,449,14.0
11.0,414,34.0


## mean/median time available

In [31]:
print("time available PrT")
trips[trips.Czas_PrT<BUDGET_PRT].groupby(
    by=['POI'])['Czas_PrT', 'time_left_PrT'].agg(['sum','mean','median'])

time available PrT


Unnamed: 0_level_0,Czas_PrT,Czas_PrT,Czas_PrT,time_left_PrT,time_left_PrT,time_left_PrT
Unnamed: 0_level_1,sum,mean,median,sum,mean,median
POI,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1.0,1659.6,10.6,10.6,195.8,1.3,1.2
2.0,4261.2,9.2,9.5,1245.6,2.7,2.4
3.0,270.0,10.8,11.1,27.3,1.1,0.8
4.0,1078.9,10.2,10.5,181.8,1.7,1.4
5.0,1788.0,10.0,10.5,329.1,1.8,1.4
7.0,11.5,11.5,11.5,0.4,0.4,0.4
8.0,4552.2,9.6,9.9,1073.5,2.3,2.0
9.0,4543.7,8.6,8.8,1736.1,3.3,3.0
10.0,4181.9,9.3,9.6,1158.3,2.6,2.3
11.0,4004.8,9.7,10.0,919.1,2.2,1.9


In [33]:
print("time available PuT")
trips[trips.Czas_PuT<BUDGET_PUT].groupby(
    by=['POI'])['Czas_PuT', 'time_left_PuT'].agg(['sum','mean','median'])

time available PuT


Unnamed: 0_level_0,Czas_PuT,Czas_PuT,Czas_PuT,time_left_PuT,time_left_PuT,time_left_PuT
Unnamed: 0_level_1,sum,mean,median,sum,mean,median
POI,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1.0,94.3,10.5,11.0,12.8,1.4,0.9
2.0,168.1,9.9,9.9,34.1,2.0,2.0
3.0,177.7,9.9,9.4,36.4,2.0,2.5
4.0,30.9,10.3,11.1,4.8,1.6,0.8
5.0,62.0,10.3,10.9,9.4,1.6,1.0
8.0,101.3,10.1,10.7,17.7,1.8,1.2
9.0,406.3,9.4,10.0,105.2,2.4,1.9
10.0,140.2,10.0,10.3,26.3,1.9,1.6
11.0,339.8,10.0,10.3,64.6,1.9,1.6
12.0,382.2,9.8,10.4,81.6,2.1,1.5
