In [19]:
import pandas as pd
import json
import numpy as np
import re
import datetime
import os
import matplotlib.pyplot as plt

In [20]:
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] #用来正常显示中文标签

#desktop = '/Users/panbingqing/Documents/工作/城市大脑/长沙调控/各路口配时/特立配时/'
desktop = '/Users/panbingqing/Documents/工作/城市大脑/长沙调控/各路口配时/'

In [21]:
fill_data = pd.read_csv(desktop+'fill_data.csv')
fill_data.head()

Unnamed: 0,camera_id,car_count,date,direction,inter_name,lane,passtime,road_name,time,weekday
0,1,3.0,2019-05-17,LB,星沙-滨湖,1,2019-05-17 00:00:00,星沙-滨湖北,00:00:00,Friday
1,1,6.0,2019-05-24,LB,星沙-滨湖,1,2019-05-24 00:00:00,星沙-滨湖北,00:00:00,Friday
2,1,1.0,2019-05-31,LB,星沙-滨湖,1,2019-05-31 00:00:00,星沙-滨湖北,00:00:00,Friday
3,1,2.0,2019-06-14,LB,星沙-滨湖,1,2019-06-14 00:00:00,星沙-滨湖北,00:00:00,Friday
4,1,1.0,2019-06-21,LB,星沙-滨湖,1,2019-06-21 00:00:00,星沙-滨湖北,00:00:00,Friday


In [22]:
xswx = fill_data[fill_data.inter_name=='星沙-特立']

In [23]:
count_data = xswx.copy()
count_data.to_csv(desktop+'count_data.csv', index=False)
count_data.head()

Unnamed: 0,camera_id,car_count,date,direction,inter_name,lane,passtime,road_name,time,weekday
373440,5,3.666667,2019-05-10,L,星沙-特立,1,2019-05-10 00:00:00,星沙-特立南,00:00:00,Friday
373441,5,2.0,2019-05-17,L,星沙-特立,1,2019-05-17 00:00:00,星沙-特立南,00:00:00,Friday
373442,5,6.0,2019-05-24,L,星沙-特立,1,2019-05-24 00:00:00,星沙-特立南,00:00:00,Friday
373443,5,1.0,2019-05-31,L,星沙-特立,1,2019-05-31 00:00:00,星沙-特立南,00:00:00,Friday
373444,5,0.0,2019-06-07,L,星沙-特立,1,2019-06-07 00:00:00,星沙-特立南,00:00:00,Friday


In [24]:
def resample_data(data, sep):
    data.time = pd.to_datetime(data.time, format='%H:%M:%S')
    data.index = data.pop('time')
    columns = data.columns.tolist()
    columns.remove('car_count')
    data = data.groupby(columns).resample(sep).car_count.sum().reset_index()
    data.time = data.time.dt.time.astype(str)
    return data

In [25]:
def get_peak_data(data, stime=datetime.time(6, 30), etime=datetime.time(9, 0)):
    data = data.copy()
    data.time = pd.to_datetime(data.time, format='%H:%M:%S')
    data = data[(data.time.dt.time>=stime)&(data.time.dt.time<etime)]
    data.time = data.time.dt.time.astype(str)
    return data

In [26]:
def get_corr_info_by_date(data, columns):
    groups = data.groupby(columns)
    corr_info = pd.DataFrame()
    for group in groups:
        new_group = group[1]
        group_pivot = new_group.pivot_table(index='time', columns='date', values='car_count', aggfunc='mean')
        corr_coef = group_pivot.corr().stack()
        corr_coef.index.rename(['date_x', 'date_y'], inplace=True)
        corr_coef = corr_coef.reset_index()
        corr_coef.rename(columns={0: 'corr_coef'}, inplace=True)
        for i, col in enumerate(columns):
            corr_coef[col] = group[0][i]
        corr_info = corr_info.append(corr_coef)
    return corr_info

In [27]:
def get_coef_var_by_date(data, columns):
    groups = data.groupby(columns)
    corr_info = pd.DataFrame()
    for group in groups:
        new_group = group[1]
        group_pivot = new_group.pivot_table(index='time', columns='date', values='car_count', aggfunc='mean')
        coef_var_list = []
        for col in group_pivot.columns:
            if col == 'mean':
                continue
            coef_var = (group_pivot[[col, 'mean']].std(axis=1).mean())/(group_pivot['mean'].mean())
            coef_var_list.append({'date': col, 'coef_var': coef_var})
        coef_var = pd.DataFrame(coef_var_list)
        for i, col in enumerate(columns):
            coef_var[col] = group[0][i]
        corr_info = corr_info.append(coef_var)
    return corr_info

In [28]:
def get_corr_info_by_weekday(data, columns):
    groups = data.groupby(columns)
    corr_info = pd.DataFrame()
    for group in groups:
        new_group = group[1]
        group_pivot = new_group.pivot_table(index='time', columns='weekday', values='car_count', aggfunc='mean')
        corr_coef = group_pivot.corr().stack()
        corr_coef.index.rename(['weekday_x', 'weekday_y'], inplace=True)
        corr_coef = corr_coef.reset_index()
        corr_coef.rename(columns={0: 'corr_coef'}, inplace=True)
        for i, col in enumerate(columns):
            corr_coef[col] = group[0][i]
        corr_info = corr_info.append(corr_coef)
        return corr_info

In [29]:
def get_coef_var_by_weekday(data, columns):
    groups = data.groupby(columns)
    corr_info = pd.DataFrame()
    for group in groups:
        new_group = group[1]
        group_pivot = new_group.pivot_table(index='time', columns='weekday', values='car_count', aggfunc='mean')
        coef_var_list = []
        for col_x in group_pivot.columns:
            col_x_mean = group_pivot[col_x].mean()
            for col_y in group_pivot.columns:
                coef_var = group_pivot[[col_x, col_y]].std(axis=1).mean() / col_x_mean
                coef_var_list.append({'weekday_x': col_x,'weekday_y': col_y, 'coef_var': coef_var})
        coef_var = pd.DataFrame(coef_var_list)
        for i, col in enumerate(columns):
            coef_var[col] = group[0][i]
        corr_info = corr_info.append(coef_var)
    return corr_info

# 道路信息

In [30]:
road_count = count_data.groupby(['inter_name', 'road_name', 'weekday', 'date', 'time']).car_count.sum().reset_index()
road_count.head()

Unnamed: 0,inter_name,road_name,weekday,date,time,car_count
0,星沙-特立,星沙-特立东,Friday,2019-05-17,00:00:00,16.0
1,星沙-特立,星沙-特立东,Friday,2019-05-17,00:03:00,4.0
2,星沙-特立,星沙-特立东,Friday,2019-05-17,00:06:00,8.0
3,星沙-特立,星沙-特立东,Friday,2019-05-17,00:09:00,7.0
4,星沙-特立,星沙-特立东,Friday,2019-05-17,00:12:00,9.0


In [31]:
road_count = resample_data(road_count, sep='6T')

In [32]:
date_mean_road_count = road_count.groupby(['inter_name', 'road_name', 'weekday', 'time']).car_count.mean().reset_index()
week_road_count = date_mean_road_count.copy()
date_mean_road_count['date'] = 'mean'
new_road_count = road_count.append(date_mean_road_count).reset_index(drop=True)
new_road_count.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,car_count,date,inter_name,road_name,time,weekday
0,20.0,2019-05-17,星沙-特立,星沙-特立东,00:00:00,Friday
1,15.0,2019-05-17,星沙-特立,星沙-特立东,00:06:00,Friday
2,13.0,2019-05-17,星沙-特立,星沙-特立东,00:12:00,Friday
3,15.0,2019-05-17,星沙-特立,星沙-特立东,00:18:00,Friday
4,45.0,2019-05-17,星沙-特立,星沙-特立东,00:24:00,Friday


### 相关系数_date

In [33]:
corr_info_all_day = get_corr_info_by_date(new_road_count, ['inter_name', 'road_name', 'weekday'])
corr_info_all_day['period'] = '全天'
corr_info_peak = get_corr_info_by_date(get_peak_data(new_road_count), ['inter_name', 'road_name', 'weekday'])
corr_info_peak['period'] = '早高峰'
corr_info = corr_info_all_day.append(corr_info_peak)
corr_info.to_csv(desktop+'road_name_corr_info_by_date.csv', index=False)
corr_info.head()

Unnamed: 0,date_x,date_y,corr_coef,inter_name,road_name,weekday,period
0,2019-05-17,2019-05-17,1.0,星沙-特立,星沙-特立东,Friday,全天
1,2019-05-17,2019-05-24,0.669132,星沙-特立,星沙-特立东,Friday,全天
2,2019-05-17,2019-05-31,0.795429,星沙-特立,星沙-特立东,Friday,全天
3,2019-05-17,2019-06-14,0.821817,星沙-特立,星沙-特立东,Friday,全天
4,2019-05-17,2019-06-21,0.375308,星沙-特立,星沙-特立东,Friday,全天


### 变异系数_date

In [34]:
coef_var_all_day = get_coef_var_by_date(new_road_count, ['inter_name', 'road_name', 'weekday'])
coef_var_all_day['period'] = '全天'
coef_var_peak = get_coef_var_by_date(get_peak_data(new_road_count), ['inter_name', 'road_name', 'weekday'])
coef_var_peak['period'] = '早高峰'
coef_var = coef_var_all_day.append(coef_var_peak)
coef_var.to_csv(desktop+'road_name_coef_var_by_date.csv', index=False)
coef_var.head()

Unnamed: 0,coef_var,date,inter_name,road_name,weekday,period
0,0.279948,2019-05-17,星沙-特立,星沙-特立东,Friday,全天
1,0.194863,2019-05-24,星沙-特立,星沙-特立东,Friday,全天
2,0.423298,2019-05-31,星沙-特立,星沙-特立东,Friday,全天
3,0.707107,2019-06-07,星沙-特立,星沙-特立东,Friday,全天
4,0.080392,2019-06-14,星沙-特立,星沙-特立东,Friday,全天


### 相关系数_weekday

In [35]:
corr_info_all_day = get_corr_info_by_weekday(week_road_count, ['inter_name', 'road_name'])
corr_info_all_day['period'] = '全天'
corr_info_peak = get_corr_info_by_weekday(get_peak_data(week_road_count), ['inter_name', 'road_name'])
corr_info_peak['period'] = '早高峰'
corr_info = corr_info_all_day.append(corr_info_peak)
corr_info.to_csv(desktop+'road_name_corr_info_by_weekday.csv', index=False)
corr_info.head()

Unnamed: 0,weekday_x,weekday_y,corr_coef,inter_name,road_name,period
0,Friday,Friday,1.0,星沙-特立,星沙-特立东,全天
1,Friday,Monday,0.863544,星沙-特立,星沙-特立东,全天
2,Friday,Saturday,0.842231,星沙-特立,星沙-特立东,全天
3,Friday,Sunday,0.762479,星沙-特立,星沙-特立东,全天
4,Friday,Thursday,0.838352,星沙-特立,星沙-特立东,全天


### 变异系数_weekday

In [36]:
coef_var_all_day = get_coef_var_by_weekday(week_road_count, ['inter_name', 'road_name'])
coef_var_all_day['period'] = '全天'
coef_var_peak = get_coef_var_by_weekday(get_peak_data(week_road_count), ['inter_name', 'road_name'])
coef_var_peak['period'] = '早高峰'
coef_var = coef_var_all_day.append(coef_var_peak)
coef_var.to_csv(desktop+'road_name_coef_var_by_weekday.csv', index=False)
coef_var.head()

Unnamed: 0,coef_var,weekday_x,weekday_y,inter_name,road_name,period
0,0.0,Friday,Friday,星沙-特立,星沙-特立东,全天
1,0.155951,Friday,Monday,星沙-特立,星沙-特立东,全天
2,0.20812,Friday,Saturday,星沙-特立,星沙-特立东,全天
3,0.271142,Friday,Sunday,星沙-特立,星沙-特立东,全天
4,0.225098,Friday,Thursday,星沙-特立,星沙-特立东,全天


# 相位信息

In [18]:
phase_info = pd.read_csv('data/info/lane_info.csv')[['road_name', 'direction', 'phase']]
phase_info.drop_duplicates(['road_name', 'direction'], inplace=True)
phase_info.dropna(inplace=True)
phase_info.head()

FileNotFoundError: [Errno 2] File b'data/info/lane_info.csv' does not exist: b'data/info/lane_info.csv'

In [19]:
# 根据道路车道方向匹配相位信息
phase_data = count_data.merge(phase_info, on=['road_name', 'direction'])
phase_data.head()

Unnamed: 0,time,date,car_count,camera_id,inter_name,road_name,lane,direction,weekday,phase
0,00:00:00,2019-05-17,4.0,9,星沙-望仙,星沙-望仙东,1,L,Friday,C
1,00:00:00,2019-05-24,7.0,9,星沙-望仙,星沙-望仙东,1,L,Friday,C
2,00:00:00,2019-05-31,1.0,9,星沙-望仙,星沙-望仙东,1,L,Friday,C
3,00:00:00,2019-06-07,6.0,9,星沙-望仙,星沙-望仙东,1,L,Friday,C
4,00:03:00,2019-05-17,2.0,9,星沙-望仙,星沙-望仙东,1,L,Friday,C


In [484]:
mean_phase_data = phase_data.groupby(['camera_id', 'road_name', 'inter_name', 'weekday', 'time', 'phase', 'lane', 'direction']).car_count.mean().reset_index()
mean_phase_data['date'] = 'mean'

In [485]:
phase_data.append(mean_phase_data).to_csv(desktop+'phase_data.csv', index=False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


### 获取各相位最大流量信息

In [21]:
# 关键相位最大流量
key_phase = phase_data.sort_values(
    ['inter_name', 'weekday', 'date', 'time', 'phase', 'car_count']
).drop(
    ['road_name', 'lane', 'direction'], axis=1
).drop_duplicates(
    ['inter_name', 'weekday', 'date', 'time', 'phase'], keep='last'
).reset_index(drop=True)
key_phase.head()

Unnamed: 0,time,date,car_count,camera_id,inter_name,weekday,phase
0,00:00:00,2019-05-17,3.0,10,星沙-望仙,Friday,A
1,00:00:00,2019-05-17,3.0,11,星沙-望仙,Friday,B
2,00:00:00,2019-05-17,4.0,9,星沙-望仙,Friday,C
3,00:00:00,2019-05-17,3.0,12,星沙-望仙,Friday,D
4,00:03:00,2019-05-17,10.0,10,星沙-望仙,Friday,A


In [530]:
key_phase = phase_data.groupby(['inter_name', 'date', 'time', 'phase']).car_count.max()
key_phase.head()

inter_name  date        time      phase
星沙-望仙       2019-05-11  00:00:00  A        10.0
                                  B         4.0
                                  C         5.0
                                  D         5.0
                        00:03:00  A         8.0
Name: car_count, dtype: float64

In [22]:
def fill_key_phase(key_phase, columns):
    # 填充最大流量值
    groups =  key_phase.groupby(columns)
    key_phase = pd.DataFrame()
    for name, group in groups:
        group = group.pivot_table(index='time', columns='phase', values='car_count')
        group.fillna(method='pad', inplace=True)
        group = group.unstack().reset_index()
        for i, col in enumerate(columns):
            group[col] = name[i]
        group.rename(columns={0: 'car_count'}, inplace=True)
        key_phase = key_phase.append(group)
    return key_phase
key_phase = fill_key_phase(key_phase, ['inter_name', 'weekday', 'date'])
key_phase.head()

Unnamed: 0,phase,time,car_count,inter_name,weekday,date
0,A,00:00:00,3.0,星沙-望仙,Friday,2019-05-17
1,A,00:03:00,10.0,星沙-望仙,Friday,2019-05-17
2,A,00:06:00,10.0,星沙-望仙,Friday,2019-05-17
3,A,00:09:00,6.0,星沙-望仙,Friday,2019-05-17
4,A,00:12:00,9.0,星沙-望仙,Friday,2019-05-17


In [23]:
key_phase.to_csv(desktop+'key_phase.csv', index=False)

In [95]:
key_phase_2 = key_phase[key_phase.date.isin(['2019-05-22', '2019-06-05'])].copy()
key_phase_2.head()

Unnamed: 0,phase,time,car_count,inter_name,weekday,date
0,A,00:00:00,7.0,星沙-望仙,Wednesday,2019-05-22
1,A,00:03:00,7.0,星沙-望仙,Wednesday,2019-05-22
2,A,00:06:00,22.0,星沙-望仙,Wednesday,2019-05-22
3,A,00:09:00,12.0,星沙-望仙,Wednesday,2019-05-22
4,A,00:12:00,12.0,星沙-望仙,Wednesday,2019-05-22


In [527]:
date_mean_key_phase_2 = key_phase_2.groupby(['inter_name', 'phase', 'weekday', 'time']).car_count.mean().reset_index()
date_mean_key_phase_2['date'] = 'mean_2'

In [528]:
date_mean_key_phase = key_phase.groupby(['inter_name', 'phase', 'weekday', 'time']).car_count.mean().reset_index()
week_key_pahse = date_mean_key_phase.copy()
date_mean_key_phase['date'] = 'mean'
new_key_phase = key_phase.append(date_mean_key_phase).reset_index(drop=True)
new_key_phase.append(date_mean_key_phase_2).to_csv(desktop+'key_phase.csv', index=False)
new_key_phase.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,car_count,date,inter_name,phase,time,weekday
0,3.0,2019-05-17,星沙-望仙,A,00:00:00,Friday
1,10.0,2019-05-17,星沙-望仙,A,00:03:00,Friday
2,10.0,2019-05-17,星沙-望仙,A,00:06:00,Friday
3,6.0,2019-05-17,星沙-望仙,A,00:09:00,Friday
4,9.0,2019-05-17,星沙-望仙,A,00:12:00,Friday


### 相关性 date

In [25]:
corr_info_all_day = get_corr_info_by_date(new_key_phase, ['inter_name', 'phase', 'weekday'])
corr_info_all_day['period'] = '全天'
corr_info_peak = get_corr_info_by_date(get_peak_data(new_key_phase), ['inter_name', 'phase', 'weekday'])
corr_info_peak['period'] = '早高峰'
corr_info = corr_info_all_day.append(corr_info_peak)
corr_info.to_csv(desktop+'key_phase_corr_info_by_date.csv', index=False)
corr_info.head()

Unnamed: 0,date_x,date_y,corr_coef,inter_name,phase,weekday,period
0,2019-05-17,2019-05-17,1.0,星沙-望仙,A,Friday,全天
1,2019-05-17,2019-05-24,0.871796,星沙-望仙,A,Friday,全天
2,2019-05-17,2019-05-31,0.838808,星沙-望仙,A,Friday,全天
3,2019-05-17,2019-06-07,0.824854,星沙-望仙,A,Friday,全天
4,2019-05-17,mean,0.948843,星沙-望仙,A,Friday,全天


### 变异系数_date

In [26]:
coef_var_all_day = get_coef_var_by_date(new_key_phase, ['inter_name', 'phase', 'weekday'])
coef_var_all_day['period'] = '全天'
coef_var_peak = get_coef_var_by_date(get_peak_data(new_key_phase), ['inter_name', 'phase', 'weekday'])
coef_var_peak['period'] = '早高峰'
coef_var = coef_var_all_day.append(coef_var_peak)
coef_var.to_csv(desktop+'key_phase_coef_var_by_date.csv', index=False)
coef_var.head()

Unnamed: 0,coef_var,date,inter_name,phase,weekday,period
0,0.124818,2019-05-17,星沙-望仙,A,Friday,全天
1,0.118669,2019-05-24,星沙-望仙,A,Friday,全天
2,0.148425,2019-05-31,星沙-望仙,A,Friday,全天
3,0.246458,2019-06-07,星沙-望仙,A,Friday,全天
0,0.133543,2019-05-13,星沙-望仙,A,Monday,全天


### 相关性_weekday

In [27]:
corr_info_all_day = get_corr_info_by_weekday(week_key_pahse, ['inter_name', 'phase'])
corr_info_all_day['period'] = '全天'
corr_info_peak = get_corr_info_by_weekday(get_peak_data(week_key_pahse), ['inter_name', 'phase'])
corr_info_peak['period'] = '早高峰'
corr_info = corr_info_all_day.append(corr_info_peak)
corr_info.to_csv(desktop+'key_pahse_corr_info_by_weekday.csv', index=False)
corr_info.head()

Unnamed: 0,weekday_x,weekday_y,corr_coef,inter_name,phase,period
0,Friday,Friday,1.0,星沙-望仙,A,全天
1,Friday,Monday,0.912357,星沙-望仙,A,全天
2,Friday,Saturday,0.94316,星沙-望仙,A,全天
3,Friday,Sunday,0.918093,星沙-望仙,A,全天
4,Friday,Thursday,0.90568,星沙-望仙,A,全天


### 变异系数_weekday

In [28]:
coef_var_all_day = get_coef_var_by_weekday(week_key_pahse, ['inter_name', 'phase'])
coef_var_all_day['period'] = '全天'
coef_var_peak = get_coef_var_by_weekday(get_peak_data(week_key_pahse), ['inter_name', 'phase'])
coef_var_peak['period'] = '早高峰'
coef_var = coef_var_all_day.append(coef_var_peak)
coef_var.to_csv(desktop+'key_pahse_coef_var_by_weekday.csv', index=False)
coef_var.head()

Unnamed: 0,coef_var,weekday_x,weekday_y,inter_name,phase,period
0,0.0,Friday,Friday,星沙-望仙,A,全天
1,0.123778,Friday,Monday,星沙-望仙,A,全天
2,0.111599,Friday,Saturday,星沙-望仙,A,全天
3,0.132859,Friday,Sunday,星沙-望仙,A,全天
4,0.126284,Friday,Thursday,星沙-望仙,A,全天


# 分层聚类

In [29]:
from sklearn.cluster import AgglomerativeClustering

In [30]:
# 分类
def get_label(data, cluster_by, n_clusters=6):
    ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward').fit(data)
    label = ward.labels_
    data['label'] = label
    label = data.pop('label')
    label_result = data.stack().reset_index(cluster_by)
    label_result['label']=label
    return label_result

### 相位最大流量聚类

In [31]:
def get_cluster(data, columns, cluster_by, values, n_clusters=6):
    groups = data.groupby(columns)
    cluster = pd.DataFrame()
    for name, group in groups:
        group = group.pivot_table(index='time', columns=cluster_by, values=values)
        label = get_label(group, cluster_by, n_clusters)
        label.rename(columns= {0: values}, inplace=True)
        for i, col in enumerate(columns):
            label[col] = name[i]
        cluster = cluster.append(label)
    cluster.reset_index(inplace=True)
    cluster['n_clusters'] = n_clusters
    return cluster

In [515]:
resample_key_phase = resample_data(get_peak_data(date_mean_key_phase),sep='6T')
resample_key_phase.drop('date', axis=1, inplace=True)
resample_key_phase.head()

Unnamed: 0,inter_name,phase,weekday,time,car_count
0,星沙-望仙,A,Wednesday,06:30:00,16.0
1,星沙-望仙,A,Wednesday,06:36:00,13.0
2,星沙-望仙,A,Wednesday,06:42:00,19.5
3,星沙-望仙,A,Wednesday,06:48:00,22.5
4,星沙-望仙,A,Wednesday,06:54:00,21.0


In [117]:
all_cluster = pd.DataFrame()
for n in [4, 5, 6]:
    cluster = get_cluster(resample_key_phase,
                          columns=['inter_name', 'weekday'],
                          cluster_by='phase',
                          values='car_count',
                          n_clusters=n)
    all_cluster = all_cluster.append(cluster)
all_cluster.to_csv(desktop+'car_count_cluster.csv', index=False)
all_cluster.head()

Unnamed: 0,time,phase,car_count,label,inter_name,weekday,n_clusters
0,06:30:00,A,12.75,2,星沙-望仙,Friday,4
1,06:30:00,B,3.5,2,星沙-望仙,Friday,4
2,06:30:00,C,13.5,2,星沙-望仙,Friday,4
3,06:30:00,D,6.166667,2,星沙-望仙,Friday,4
4,06:36:00,A,14.0,2,星沙-望仙,Friday,4


### 算绿信比和周期长

In [521]:
# 相位饱和度，饱和流率设定为1440pcu/h
sat_key_phase = resample_key_phase.copy()
sat_key_phase['flow_rate'] = sat_key_phase.car_count / (1650 / 60 * 6)
# sat_key_phase.car_count = sat_key_phase.car_count.apply(lambda x: int(x)+1 if x % 1 != 0 else x)
sat_key_phase.head()

Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.09697
1,星沙-望仙,A,Wednesday,06:36:00,13.0,0.078788
2,星沙-望仙,A,Wednesday,06:42:00,19.5,0.118182
3,星沙-望仙,A,Wednesday,06:48:00,22.5,0.136364
4,星沙-望仙,A,Wednesday,06:54:00,21.0,0.127273


In [101]:
# key_phase.to_csv(desktop+'key_phase.csv', index=False)

In [517]:
def get_green_split(df):
    green_split = df.car_count / df.groupby(df.index).car_count.sum()
    return green_split
    
def get_cycle(df):
    L = 6 * 4
    total_lost = (1.5 * L + 5)
    sum_rate = 1 - df.groupby(df.index).flow_rate.sum()
    sum_rate.where(sum_rate<1, 0.001, inplace=True)
    cycle = total_lost / sum_rate
    cycle = cycle.apply(lambda x: int(x)+1 if x % 1 != 0 else x)
    return cycle.astype(int)

def get_phase_time(green_split, cycle):
    phase_time = (cycle - 6 * 4) * green_split + 6
    phase_time = int(phase_time)+1 if phase_time % 1 != 0 else phase_time
    return phase_time

def get_timing_data(key_phase, columns):
    groups = key_phase.groupby(columns)
    timing_data = pd.DataFrame()
    for name, group in groups:
        group.index = group['time']
        group = group.sort_index()
        group['cycle'] = get_cycle(group)
        group['green_split'] = get_green_split(group)
        group['phase_time'] = group.apply(lambda x: get_phase_time(x['green_split'], x['cycle']), axis=1)
        timing_data = timing_data.append(group)
    timing_data.reset_index(drop=True, inplace=True)
    return timing_data

In [522]:
timing_data = get_timing_data(sat_key_phase, columns=['inter_name', 'weekday'])
timing_data.head()

Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.09697,55,0.390244,19.0
1,星沙-望仙,C,Wednesday,06:30:00,8.0,0.048485,55,0.195122,13.0
2,星沙-望仙,D,Wednesday,06:30:00,14.0,0.084848,55,0.341463,17.0
3,星沙-望仙,B,Wednesday,06:30:00,3.0,0.018182,55,0.073171,9.0
4,星沙-望仙,D,Wednesday,06:36:00,4.0,0.024242,53,0.108108,10.0


In [523]:
timing_data.to_csv(desktop+'timing_data.csv', index=False)

In [105]:
# timing_data.loc[timing_data.phase=='A', 'phase_time'] += 5
# timing_data.loc[timing_data.phase=='B', 'phase_time'] -= 5
# timing_data.loc[timing_data.phase=='C', 'phase_time'] += 5

In [120]:
timing_cluster = pd.DataFrame()
for n in [3, 4, 5, 6]:
    cluster = get_cluster(timing_data,
                          columns=['inter_name', 'weekday'],
                          cluster_by='phase',
                          values='phase_time',
                          n_clusters=n)
    timing_cluster = timing_cluster.append(cluster)
timing_cluster.to_csv(desktop+'phase_time_cluster.csv', index=False)
timing_cluster.head()

Unnamed: 0,time,phase,phase_time,label,inter_name,weekday,n_clusters
0,06:30:00,A,18.0,1,星沙-望仙,Friday,3
1,06:30:00,B,10.0,1,星沙-望仙,Friday,3
2,06:30:00,C,18.0,1,星沙-望仙,Friday,3
3,06:30:00,D,12.0,1,星沙-望仙,Friday,3
4,06:36:00,A,19.0,1,星沙-望仙,Friday,3


### 分类配时优化

In [121]:
def filter_outlier(series, inplace=False):
    '''
    # 根据箱型图分析过滤异常值，并返回NAN值
    '''
    nan_num = len(series[series.isna()])
    if not(inplace):
        series = series.copy()
    delta_q = series.quantile(0.75) - series.quantile(0.25)
    down_limit = series.quantile(0.5) - 3 * delta_q
    up_limit = series.quantile(0.5) + 3 * delta_q
    series.where(lambda x: (x >= down_limit) & (x <= up_limit), inplace=True)
    outlier_num = len(series[series.isna()])
    print(f"{nan_num} nan / {outlier_num} outlier out of [{down_limit:.2f}, {up_limit:.2f}].")
    return series

In [313]:
# new_timing = timing_cluster[(timing_cluster.n_clusters==3)&(timing_cluster.weekday=='Wednesday')].copy()
new_label = ['06:30:00', '06:54:00', '07:18:00', '07:36:00', '08:06:00', '08:48:00']
timing_cluster = pd.read_csv(desktop+'timing_cluster.csv', usecols=['inter_name', 'phase', 'weekday', 'time', 'n_clusters'])
new_timing = timing_data.merge(timing_cluster, on=['inter_name', 'phase', 'weekday', 'time'])
new_timing.head()

Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time,n_clusters
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.111111,58,0.390244,20.0,0
1,星沙-望仙,C,Wednesday,06:30:00,8.0,0.055556,58,0.195122,13.0,0
2,星沙-望仙,D,Wednesday,06:30:00,14.0,0.097222,58,0.341463,18.0,0
3,星沙-望仙,B,Wednesday,06:30:00,3.0,0.020833,58,0.073171,9.0,0
4,星沙-望仙,D,Wednesday,06:36:00,4.0,0.027778,56,0.108108,10.0,0


In [345]:
new_phase_time = pd.DataFrame()
for name, group in new_timing.groupby(['inter_name', 'phase', 'weekday', 'n_clusters']):
    group['cluster_phase_time'] = filter_outlier(group.phase_time).dropna().mean()
    group.cluster_phase_time = group.cluster_phase_time.apply(lambda x: int(x)+1 if x % 1 != 0 else x)
#     group['std_time'] = filter_outlier(group.phase_time).dropna().std()
    new_phase_time = new_phase_time.append(group)
new_phase_time2 = pd.DataFrame()
for name, group in new_phase_time.groupby(['inter_name', 'weekday', 'n_clusters']):
    group['cluster_cycle'] = group.cluster_phase_time.sum() / (len(group)/len(group.phase.unique()))
    new_phase_time2 = new_phase_time2.append(group)
new_phase_time = new_phase_time2
new_phase_time.head()

0 nan / 0 outlier out of [12.00, 30.00].
0 nan / 0 outlier out of [18.00, 42.00].
0 nan / 0 outlier out of [-6.00, 78.00].


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


0 nan / 0 outlier out of [-6.00, 114.00].
0 nan / 0 outlier out of [23.00, 83.00].
0 nan / 0 outlier out of [6.50, 12.50].
0 nan / 0 outlier out of [2.75, 22.25].
0 nan / 0 outlier out of [5.00, 29.00].
0 nan / 1 outlier out of [10.00, 40.00].
0 nan / 0 outlier out of [-0.50, 50.50].
0 nan / 0 outlier out of [2.75, 43.25].
0 nan / 0 outlier out of [12.75, 56.25].
0 nan / 0 outlier out of [20.00, 68.00].
0 nan / 0 outlier out of [26.00, 116.00].
0 nan / 0 outlier out of [40.50, 91.50].
0 nan / 0 outlier out of [7.00, 19.00].
0 nan / 0 outlier out of [9.50, 21.50].
0 nan / 0 outlier out of [7.00, 37.00].
0 nan / 1 outlier out of [14.00, 50.00].
0 nan / 0 outlier out of [-25.50, 85.50].


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time,n_clusters,cluster_phase_time,cluster_cycle
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.111111,58,0.390244,20.0,0,21.0,67.0
7,星沙-望仙,A,Wednesday,06:36:00,13.0,0.090278,56,0.351351,18.0,0,21.0,67.0
9,星沙-望仙,A,Wednesday,06:42:00,19.5,0.135417,68,0.342105,22.0,0,21.0,67.0
14,星沙-望仙,A,Wednesday,06:48:00,22.5,0.15625,73,0.357143,24.0,0,21.0,67.0
3,星沙-望仙,B,Wednesday,06:30:00,3.0,0.020833,58,0.073171,9.0,0,10.0,67.0


### 标准差较正

In [318]:
# new_phase_time['std_phase_time'] = new_phase_time.cluster_phase_time
# new_phase_time.loc[new_phase_time.phase.isin(['A', 'C']), 'std_phase_time'] += new_phase_time.std_time
# new_phase_time.std_phase_time = new_phase_time.std_phase_time.apply(lambda x: int(x)+1 if x % 1 != 0 else x)

### 绿信比较正

In [406]:
groups = new_phase_time.groupby(['inter_name', 'phase', 'weekday', 'n_clusters'])
new_phase_time2 = pd.DataFrame()
for name, group in groups:
#     group.reset_index(drop=True, inplace=True)
    group['new_green_split'] = group.cluster_phase_time / group.cluster_cycle
    if name[1] == 'C':
        other = group.green_split.where(group.green_split > group.new_green_split, group.new_green_split)
        group['new_green_split'] = ((group.new_green_split + other) / 2).mean()
    new_phase_time2 = new_phase_time2.append(group)
new_phase_time2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time,n_clusters,cluster_phase_time,cluster_cycle,new_green_split
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.111111,58,0.390244,20.0,0,21.0,67.0,0.313433
7,星沙-望仙,A,Wednesday,06:36:00,13.0,0.090278,56,0.351351,18.0,0,21.0,67.0,0.313433
9,星沙-望仙,A,Wednesday,06:42:00,19.5,0.135417,68,0.342105,22.0,0,21.0,67.0,0.313433
14,星沙-望仙,A,Wednesday,06:48:00,22.5,0.15625,73,0.357143,24.0,0,21.0,67.0,0.313433
19,星沙-望仙,A,Wednesday,06:54:00,21.0,0.145833,79,0.304348,23.0,1,29.0,95.0,0.305263


In [413]:
groups = new_phase_time2.groupby(['inter_name', 'weekday', 'n_clusters'])
new_phase_time3 = pd.DataFrame()
for name, group in groups:
#     group.reset_index(drop=True, inplace=True)
    green_a = group[group.phase=="A"].new_green_split.mean()
    green_b = group[group.phase=="B"].new_green_split.mean()
    green_c = group[group.phase=="C"].new_green_split.mean()
    green_d = group[group.phase=="D"].new_green_split.mean()
    group.loc[group.phase=="A", 'new_green_split'] = (1 - green_c) * (green_a / (green_a+green_b+green_d))
    group.loc[group.phase=="B", 'new_green_split'] = (1 - green_c) * (green_b / (green_a+green_b+green_d))
    group.loc[group.phase=="D", 'new_green_split'] = (1 - green_c) * (green_d / (green_a+green_b+green_d))
    group['split_phase_time'] = (group.new_green_split * group.cluster_cycle).apply(lambda x: int(x)+1 if x % 1 != 0 else x)
    new_phase_time3 = new_phase_time3.append(group)
new_phase_time3.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time,n_clusters,cluster_phase_time,cluster_cycle,new_green_split,split_phase_time
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.111111,58,0.390244,20.0,0,21.0,67.0,0.294581,20
7,星沙-望仙,A,Wednesday,06:36:00,13.0,0.090278,56,0.351351,18.0,0,21.0,67.0,0.294581,20
9,星沙-望仙,A,Wednesday,06:42:00,19.5,0.135417,68,0.342105,22.0,0,21.0,67.0,0.294581,20
14,星沙-望仙,A,Wednesday,06:48:00,22.5,0.15625,73,0.357143,24.0,0,21.0,67.0,0.294581,20
3,星沙-望仙,B,Wednesday,06:30:00,3.0,0.020833,58,0.073171,9.0,0,10.0,67.0,0.140277,10


### 行人较正

In [414]:
def get_ped_time(phase, phase_time):
    if phase == 'A':
        phase_time = phase_time if phase_time >20 else 20
    if phase == 'C' or phase == 'D':
        phase_time = phase_time if phase_time >24 else 24
    return phase_time

new_phase_time4 = new_phase_time3.copy()
new_phase_time4['ped_phase_time'] = new_phase_time4.apply(lambda x: get_ped_time(x['phase'], x['split_phase_time']), axis=1)
new_phase_time4.head()

Unnamed: 0,inter_name,phase,weekday,time,car_count,flow_rate,cycle,green_split,phase_time,n_clusters,cluster_phase_time,cluster_cycle,new_green_split,split_phase_time,ped_phase_time
0,星沙-望仙,A,Wednesday,06:30:00,16.0,0.111111,58,0.390244,20.0,0,21.0,67.0,0.294581,20,20
7,星沙-望仙,A,Wednesday,06:36:00,13.0,0.090278,56,0.351351,18.0,0,21.0,67.0,0.294581,20,20
9,星沙-望仙,A,Wednesday,06:42:00,19.5,0.135417,68,0.342105,22.0,0,21.0,67.0,0.294581,20,20
14,星沙-望仙,A,Wednesday,06:48:00,22.5,0.15625,73,0.357143,24.0,0,21.0,67.0,0.294581,20,20
3,星沙-望仙,B,Wednesday,06:30:00,3.0,0.020833,58,0.073171,9.0,0,10.0,67.0,0.140277,10,10


In [356]:
new_phase_time4.to_csv(desktop+'new_phase_time(2day).csv', index=False)

### 车流量预估

In [466]:
def get_cycle_from_time(data, by):
    groups = data.groupby(['inter_name', 'weekday', 'time'])
    all_cycle = pd.Series()
    for name, group in groups:
        group['cycle'] = group[by].sum() / (len(group)/len(group.phase.unique()))
        all_cycle = all_cycle.append(group)
    return all_cycle.cycle

sel_phase_time = adjust_phase

phase_time_car = sel_phase_time[['inter_name', 'phase', 'weekday', 'time', 'car_count']].copy()
phase_time_car['time_type'] = 'mean_count(2day)'
all_phase_time_car = phase_time_car.copy()
for col in ['phase_time', 'cluster_phase_time', 'adjust_phase_time', 'ped_phase_time']:
    _phase_time_car = phase_time_car.copy()
    cycle = get_cycle_from_time(sel_phase_time, col)
    _phase_time_car['car_count'] = ((6*60 / cycle) * (sel_phase_time[col] - 6) / (3600/1650)).apply(round)
    _phase_time_car['time_type'] = 'pre_car_by_' + col
    all_phase_time_car = all_phase_time_car.append(_phase_time_car)
all_phase_time_car.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,inter_name,phase,weekday,time,car_count,time_type
0,星沙-望仙,A,Wednesday,6:30:00,16.0,mean_count(2day)
1,星沙-望仙,B,Wednesday,6:30:00,3.0,mean_count(2day)
2,星沙-望仙,C,Wednesday,6:30:00,8.0,mean_count(2day)
3,星沙-望仙,D,Wednesday,6:30:00,14.0,mean_count(2day)
4,星沙-望仙,A,Wednesday,6:36:00,13.0,mean_count(2day)


In [467]:
key_phase_car_count = resample_data(get_peak_data(key_phase), sep='6T').copy()
key_phase_car_count.rename(columns={'date': 'time_type'}, inplace=True)
all_phase_time_car = all_phase_time_car.append(key_phase_car_count)
all_phase_time_car.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,car_count,inter_name,phase,time,time_type,weekday
0,16.0,星沙-望仙,A,6:30:00,mean_count(2day),Wednesday
1,3.0,星沙-望仙,B,6:30:00,mean_count(2day),Wednesday
2,8.0,星沙-望仙,C,6:30:00,mean_count(2day),Wednesday
3,14.0,星沙-望仙,D,6:30:00,mean_count(2day),Wednesday
4,13.0,星沙-望仙,A,6:36:00,mean_count(2day),Wednesday


In [468]:
all_phase_time_car.to_csv(desktop+'phase_time_car(adjust).csv', index=False)

### 手动调整

In [495]:
adjust_phase = pd.read_csv(desktop+'timing_0607/new_phase_time(adjust).csv')
adjust_phase.head()

Unnamed: 0,inter_name,weekday,car_count,flow_rate,cycle,green_split,phase_time,n_clusters,cluster_phase_time,cluster_cycle,new_green_split,time,phase,split_phase_time,ped_phase_time,adjust,adjust_phase_time,change_num
0,星沙-望仙,Wednesday,16.0,0.111111,58,0.390244,20,0,21,67,0.289855,6:30:00,A,20,20,0,20,0
1,星沙-望仙,Wednesday,3.0,0.020833,58,0.073171,9,0,10,67,0.144928,6:30:00,B,10,10,0,10,0
2,星沙-望仙,Wednesday,8.0,0.055556,58,0.195122,13,0,22,67,0.362319,6:30:00,C,25,25,0,25,0
3,星沙-望仙,Wednesday,14.0,0.097222,58,0.341463,18,0,14,67,0.202899,6:30:00,D,14,24,0,14,0
4,星沙-望仙,Wednesday,13.0,0.090278,56,0.351351,18,0,21,67,0.289855,6:36:00,A,20,20,0,20,0


In [500]:
result_phase = pd.pivot_table(adjust_phase, index=['change_num'], columns='phase', values='ped_phase_time')
result_phase['cycle_time'] = result_phase.sum(axis=1)
result_phase['cycle_num'] = (adjust_phase.groupby('change_num').phase.count()/4 * 6 * 60 / result_phase['cycle_time']).apply(round)
result_phase['total_time'] = result_phase.cycle_time * result_phase.cycle_num
# result_phase.to_csv(desktop+'change_phase.csv')
result_phase

phase,A,B,C,D,cycle_time,cycle_num,total_time
change_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,20,10,25,24,79,18,1422
1,28,13,40,24,105,14,1470
2,37,14,57,24,132,8,1056
3,56,19,83,30,188,10,1880
4,46,19,79,33,177,14,2478
5,37,14,57,24,132,5,660


In [503]:
result_phase.total_time.sum()

8966

In [502]:
result_phase.apply(lambda x: f"0:{x['A']-5};1:5;2:{x['B']-5};3:5;4:{x['C']-5};5:5;6:{x['D']-5};7:5", axis=1)

change_num
0     0:15;1:5;2:5;3:5;4:20;5:5;6:19;7:5
1     0:23;1:5;2:8;3:5;4:35;5:5;6:19;7:5
2     0:32;1:5;2:9;3:5;4:52;5:5;6:19;7:5
3    0:51;1:5;2:14;3:5;4:78;5:5;6:25;7:5
4    0:41;1:5;2:14;3:5;4:74;5:5;6:28;7:5
5     0:32;1:5;2:9;3:5;4:52;5:5;6:19;7:5
dtype: object

### 行驶方向单车道流量

In [None]:
# 去除车道编号，保留车道方向，保留平均值
dir_data = fill_data.groupby(
    ['date', 'weekday', 'time', 'inter_name', 'road_name', 'direction']
).car_count.mean().reset_index()
dir_data.head()

In [None]:
dir_data.to_csv(desktop+'dir_data.csv', index=False)

### 周日期分类，确认配时方案日更新时间

In [None]:
def get_weekday_label(weekday):
    weekday_label = weekday
    return weekday_label

label_data = phase_data.copy()
label_data['weekday_label'] = label_data.weekday.apply(get_weekday_label)
label_data = label_data.groupby(
    ['inter_name', 'road_name', 'direction', 'weekday_label', 'time', 'phase']
).car_count.mean().reset_index()
label_data.head()

In [None]:
new_label_data = label_data[(label_data.inter_name=='星沙-望仙')].copy()
new_label_data.index = pd.to_datetime(new_label_data.time, format='%H:%M:%S')
new_label_data = new_label_data.groupby(
    ['inter_name', 'weekday_label', 'road_name', 'direction', 'phase']
).resample('15T').car_count.sum().reset_index()
new_label_data.sort_values(
    ['inter_name', 'weekday_label', 'time', 'phase', 'car_count'],
    inplace=True
)
# new_label_data.drop_duplicates(
#     ['inter_name', 'weekday_label', 'time', 'phase'],
#     keep='last', inplace=True
# )
new_label_data.to_csv(desktop+'label_data.csv', index=False)

In [None]:
new_label_data = label_data[(label_data.inter_name=='星沙-望仙')].copy()
label_max = new_label_data.groupby(
    ['inter_name', 'direction', 'weekday_label', 'time', 'phase']
).car_count.max().reset_index(level=['phase', 'direction'])
label_max.head()

In [None]:
label_max.car_count.sum()

In [None]:
old_count = label_max.loc[(label_max.phase=='D')&(label_max.direction=='L')].car_count.copy()
label_max.loc[
    (label_max.phase=='D') &
    (label_max.direction=='L'),
    'car_count'] *= 0.6
label_max.loc[
    (label_max.phase=='D') &
    (label_max.direction=='S'),
    'car_count'] += old_count * 0.4

In [None]:
label_max.car_count.sum()

In [None]:
label_max.to_csv(desktop+'label_max.csv')

In [None]:
label_data = label_max.reset_index(
    ['inter_name', 'weekday_label', 'time']
)
label_data.index = pd.to_datetime(label_data.time, format='%H:%M:%S')

In [None]:
label_data = label_data.groupby(
    ['inter_name', 'weekday_label', 'direction', 'phase']
).resample('15T').car_count.sum().reset_index()
label_data.head()

In [None]:
new_cluster = cluster.reset_index().copy()
new_cluster.index = pd.MultiIndex.from_frame(new_cluster[['inter_name', 'weekday_label', 'label', 'phase']])
new_cluster.head()

In [None]:
label_mean = cluster.groupby(['inter_name', 'weekday_label', 'label', 'phase']).phase_time.mean()
new_cluster['label_mean'] = label_mean.astype(int)+1

label_max = cluster.groupby(['inter_name', 'weekday_label', 'label', 'phase']).phase_time.max()
new_cluster['label_max'] = label_max.astype(int)+1

In [None]:
new_cluster.to_csv(desktop+'cluste.csv', index=False)

### C`相位

In [None]:
new_veh_count = label_data[label_data.phase.isin(['C', 'D'])].copy()
dir_ratio = [{'phase': 'C', 'direction': 'SL', 'ratio': {'S': 0.92, 'L': 0.08}}]

for dir_road in dir_ratio:
    dir_road_veh_count = new_veh_count[
        (new_veh_count.phase==dir_road['phase']) &
        (new_veh_count.direction==dir_road['direction'])]
    new_veh_count.drop(dir_road_veh_count.index, inplace=True)
    ratio = dir_road['ratio']
    for k, v in ratio.items():
        new_dir = dir_road_veh_count.copy()
        new_dir.direction = k
        new_dir.car_count = dir_road_veh_count.car_count * v
        new_veh_count = new_veh_count.append(new_dir)
new_veh_count = new_veh_count.groupby(
    ['inter_name', 'weekday_label', 'time', 'direction', 'phase']
).car_count.sum().reset_index()

In [None]:
new_veh_count.index = pd.MultiIndex.from_frame(new_veh_count[['inter_name', 'weekday_label', 'time']])
new_veh_count.drop(columns=['inter_name', 'weekday_label', 'time'], inplace=True)
new_veh_count.head()

In [None]:
c_delta = new_veh_count[
    (new_veh_count.phase=='C')&(new_veh_count.direction=='S')
].car_count - new_veh_count[
    (new_veh_count.phase=='C')&(new_veh_count.direction=='L')
].car_count
c_delta.where(c_delta>0, 0, inplace=True)

In [None]:
d_delta = new_veh_count[
    (new_veh_count.phase=='D')&(new_veh_count.direction=='S')
].car_count - new_veh_count[
    (new_veh_count.phase=='D')&(new_veh_count.direction=='L')
].car_count
d_delta.where(d_delta>0, 0, inplace=True)

In [None]:
new_timing_data = timing_data[['inter_name', 'weekday_label', 'time', 'car_count', 'phase_time', 'phase']].copy()
new_timing_data.index = pd.MultiIndex.from_frame(new_timing_data[['inter_name', 'weekday_label', 'time']])
new_timing_data.drop(columns=['inter_name', 'weekday_label', 'time'], inplace=True)
new_timing_data.head()

In [None]:
# delat_ratio = pd.DataFrame({
#     'c_delat': (c_delta / new_timing_data[new_timing_data.phase=='C'].car_count
#                ) * new_timing_data[new_timing_data.phase=='C'].phase_time,
#     'd_delat': (d_delta / new_timing_data[new_timing_data.phase=='D'].car_count
#                ) * new_timing_data[new_timing_data.phase=='D'].phase_time
# })
# delat_ratio = delat_ratio.apply(min, axis=1).apply(int)
# delat_ratio.where(delat_ratio>5, 5, inplace=True)

delat_ratio = (d_delta / 
               new_timing_data[new_timing_data.phase=='D'].car_count *
               new_timing_data[new_timing_data.phase=='D'].phase_time).apply(int)
delat_ratio.head()

In [None]:
timing = pd.DataFrame({
    'A': new_timing_data[new_timing_data.phase=='A'].phase_time,
    'B': new_timing_data[new_timing_data.phase=='B'].phase_time,
    'C': new_timing_data[new_timing_data.phase=='C'].phase_time,
    'C`': delat_ratio,
    'D': new_timing_data[new_timing_data.phase=='D'].phase_time - delat_ratio
})

In [None]:
timing.stack().to_csv(desktop+'another_timing.csv')

In [None]:
timing.head()

# 车头时距

In [360]:
def time_delta(time_series):
    time_series = time_series.sort_values().copy()
    time_delta = (time_series.shift(-1) - time_series).dropna()
    time_delta.reset_index(drop=True, inplace=True)
    return time_delta.dt.total_seconds()

In [424]:
veh_flow = pd.read_csv('data/xingsha_0511.csv')
veh_flow.passtime = pd.to_datetime(veh_flow.passtime, format='%Y/%m/%d %H:%M:%S')

In [432]:
veh_flow['time'] = veh_flow.passtime
headway_time = get_peak_data(veh_flow).groupby(
    ['inter_name', 'road_name', 'lane', 'direction']
).passtime.apply(time_delta).reset_index()

In [433]:
headway_time = headway_time[
    (headway_time.inter_name=='星沙-望仙') &
    (headway_time.passtime>=1)
]

In [434]:
cut_range = [x/2 for x in range(0, 22)]
headway_time['cut_range'] = pd.cut(headway_time.passtime, cut_range, right=False)

In [435]:
headway_time.to_csv(desktop+'headway.csv', index=False)

In [None]:
headway_time