# ステーションごとの特徴やネットワークに着目した分析

### モジュールのインポート  

table.ipynbをモジュールとして読み込み, CSVファイルの内容ごとに結合・前処理したDataFrameにアクセスする．

主なDataFrameは以下の5種類．  

- table.station：station_data  
- table.status：status_data  
- table.trip：trip_data
- table.weather：weather_data
- table.feature：table.tripにtable.stationとtable.weatherの必要な情報を結合したDataFrame  

＊結合前の各CSVファイルにもアクセスできる(例：table.station_201608)

In [1]:
# table.ipynb
import sys
import notebookutil as nbu
sys.meta_path.append(nbu.NotebookFinder())
import table

# その他に必要なモジュール(機械学習用のモジュールは使用時に読み込む)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## 可視化

### Landmarkごとに色分けしてマッピング

In [2]:
import gmaps
gmaps.configure(api_key="AIzaSyBpe5yZOyiRztKKWaP7lJ-r7RraFIFk0DU")
fig = gmaps.Map()

landmark_list = ["San Francisco", "Redwood City", "Palo Alto", "Mountain View",  "San Jose"]
color_list = ["rgba(255,69,0,1)", "rgba(255,140,0,1)", "rgba(255,255,0,1)", "rgba(50,205,50, 1)", "rgba(30,144,255, 1)"]

for i in range(len(landmark_list)):
    landmark = landmark_list[i]
    color = color_list[i]
    locations = np.array(table.station[table.station.landmark == landmark][["lat", "long"]])
    if i == 1:
        landmark_layer = gmaps.symbol_layer([(37.491405, -122.23305)], fill_color=color, stroke_color=color, scale=2)
    else :
        locations = np.array(table.station[table.station.landmark == landmark][["lat", "long"]])
        landmark_layer = gmaps.symbol_layer(locations, fill_color=color, stroke_color=color, scale=2)
    fig.add_layer(landmark_layer)

print("\t\t\tred : San Francisco", "\torange : Redwood City", "\tyello : Palo Alto\t", "green : Mountain View\t",  "\tblue : San Jose")
fig

			red : San Francisco 	orange : Redwood City 	yello : Palo Alto	 green : Mountain View	 	blue : San Jose


## バスケット分析  
目的  
stationの接続行列をつくり, 各station間のつながりの強さ(有向)を把握する  
年ごとや季節ごとの接続行列を比較し, station間のつながりの変化がないか調べる

In [3]:
M = table.trip[["Start Terminal", "End Terminal"]].groupby(["Start Terminal","End Terminal"]).size().to_frame("Count")
M= M.reset_index(level=[0, 1])
M= M.pivot(index ="Start Terminal", columns="End Terminal", values="Count").fillna(0).apply(lambda x: x/x.sum(), axis=1)
M.head()

End Terminal,2,3,4,5,6,7,8,9,10,11,...,76,77,80,82,83,84,88,89,90,91
Start Terminal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.018844,0.047184,0.233419,0.072138,0.123371,0.102834,0.025469,0.072433,0.045197,0.080751,...,0.0,0.0,0.003092,0.0,0.0,0.051233,7.4e-05,0.000589,0.0,0.0
3,0.248707,0.236013,0.066291,0.039492,0.068641,0.037142,0.022097,0.024448,0.048425,0.055947,...,0.0,0.0,0.009873,0.0,0.0,0.028209,0.00047,0.003761,0.0,0.0
4,0.605655,0.021301,0.055231,0.008671,0.011876,0.025448,0.011687,0.006786,0.008106,0.010368,...,0.0,0.0,0.002451,0.0,0.0,0.00509,0.0,0.000189,0.0,0.0
5,0.518355,0.038179,0.022516,0.066569,0.04699,0.048458,0.040137,0.015663,0.034263,0.034263,...,0.0,0.0,0.005874,0.0,0.0,0.033774,0.0,0.0,0.0,0.0
6,0.379845,0.041909,0.016473,0.0172,0.06686,0.042878,0.030039,0.09593,0.03561,0.030766,...,0.0,0.0,0.036337,0.0,0.0,0.053052,0.0,0.004118,0.0,0.0
