In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression as LR
from collections import defaultdict

In [2]:
root_dir = '/Users/kessapassa/OneDrive/research_log/logs/2D/'

In [3]:
pd.set_option("display.max_rows", 216)

# [タイプ][割合][seed]の3次元連想配列を0で初期化作成

In [4]:
type_list = ['census', 'vehicles', 'mobile', 'pedestrians']
dir_list = ['2_8', '4_6', '6_4', '8_2']
seed_list = range(123, 132+1)

# 3次元の連想配列　 *例['census']['2_8']['123']
csv_array = {}
for _type in type_list:
    csv_array[_type] = {}
    for _dir in dir_list:
        csv_array[_type][_dir] = {}
        for _seed in list(seed_list):
            csv_array[_type][_dir][str(_seed)] = 0

# 3次元連想配列にcsv読み込み

In [5]:
for _type in type_list:
    for _dir in dir_list:
        for _seed in seed_list:
            csv_array[_type][_dir][str(_seed)] = pd.read_csv(root_dir + _type + _dir + '_seed' + str(_seed) + '.csv', index_col=0)

In [6]:
all_ave = 0
for _dir in dir_list:
    ave = 0
    for _seed in seed_list:
        X = csv_array['census'][_dir][str(_seed)]['people'].values.reshape(-1, 1)
        Y = csv_array['vehicles'][_dir][str(_seed)]['people'].values.reshape(-1, 1)
        model = LR()
        model.fit(X, Y)
        print(_dir+': '+str(_seed)+': '+str(model.coef_[0]))
        all_ave += model.coef_[0]
        ave += model.coef_[0]
    print(str(ave/10) + '\n')
print('\n'+ str(all_ave/(len(dir_list) * 10)) + '\n')

2_8: 123: [2.4170748]
2_8: 124: [2.53947244]
2_8: 125: [2.09869452]
2_8: 126: [2.20839413]
2_8: 127: [2.60147881]
2_8: 128: [2.36918977]
2_8: 129: [2.05027433]
2_8: 130: [1.96197858]
2_8: 131: [2.25033198]
2_8: 132: [2.02895954]
[2.25258489]

4_6: 123: [2.42694829]
4_6: 124: [2.85834107]
4_6: 125: [2.78837087]
4_6: 126: [2.87909844]
4_6: 127: [2.49659501]
4_6: 128: [2.89143256]
4_6: 129: [2.88876976]
4_6: 130: [3.04989094]
4_6: 131: [3.31311034]
4_6: 132: [3.28660071]
[2.8879158]

6_4: 123: [4.0502569]
6_4: 124: [3.67504735]
6_4: 125: [4.07965347]
6_4: 126: [3.88499958]
6_4: 127: [3.95415902]
6_4: 128: [3.77216997]
6_4: 129: [3.41686278]
6_4: 130: [4.03974397]
6_4: 131: [3.96669887]
6_4: 132: [4.06052655]
[3.89001185]

8_2: 123: [4.46079405]
8_2: 124: [4.62673965]
8_2: 125: [4.61208141]
8_2: 126: [4.30809574]
8_2: 127: [4.01481325]
8_2: 128: [4.36149273]
8_2: 129: [4.30775939]
8_2: 130: [4.43811057]
8_2: 131: [4.23771404]
8_2: 132: [4.30914686]
[4.36767477]


[3.34954683]



# 割合毎の平均

In [7]:
census = {}
vehicles = {}
mobile = {}
pedestrians = {}
for _dir in dir_list:
    census[_dir] = pd.read_csv(root_dir +'census2_8_seed123.csv', index_col=0)
    census[_dir]['people'] = 0
    
    vehicles[_dir] = pd.read_csv(root_dir +'vehicles2_8_seed123.csv', index_col=0)
    vehicles[_dir]['people'] = 0
    
    mobile[_dir] = pd.read_csv(root_dir +'mobile2_8_seed123.csv', index_col=0)
    mobile[_dir]['people'] = 0
    
    pedestrians[_dir] = pd.read_csv(root_dir +'pedestrians2_8_seed123.csv', index_col=0)
    pedestrians[_dir]['people'] = 0

for _dir in dir_list:
    for _seed in seed_list:
        census[_dir]['people'] += csv_array['census'][_dir][str(_seed)]['people']
        vehicles[_dir]['people'] += csv_array['vehicles'][_dir][str(_seed)]['people']
        mobile[_dir]['people'] += csv_array['mobile'][_dir][str(_seed)]['people']
        pedestrians[_dir]['people'] += csv_array['pedestrians'][_dir][str(_seed)]['people']
        
    census[_dir]['people'] /= 10
    vehicles[_dir]['people'] /= 10
    mobile[_dir]['people'] /= 10
    pedestrians[_dir]['people'] /= 10
    
    X = census[_dir]['people'].values.reshape(-1, 1)
    Y = vehicles[_dir]['people'].values.reshape(-1, 1)
    model = LR()
    model.fit(X, Y)
    print(_dir+': '+str(model.coef_[0]))

2_8: [2.32005854]
4_6: [2.95040239]
6_4: [3.96762173]
8_2: [4.40386223]


In [8]:
alpha_list = [[2, 4, 6, 8], [2.32005854, 2.95040239, 3.96762173, 4.40386223]]
alpha = pd.DataFrame(alpha_list).T
alpha
model2 = LR()
X2 = alpha[0].values.reshape(-1, 1)
Y2 = alpha[1].values.reshape(-1, 1)
model2.fit(X2, Y2)
model2.score(X2, Y2)

0.9791482217570905

In [9]:
model2.coef_

array([[0.36343152]])

In [10]:
model2.intercept_

array([1.59332862])

# 全体の人数に比例するのでは？

In [11]:
for _dir in dir_list:
    print(_dir + ': ' + str(mobile[_dir]['people'].sum()))

2_8: 2519.1
4_6: 6004.8
6_4: 17765.7
8_2: 38907.600000000006


In [12]:
for _dir in dir_list:
    for _seed in seed_list:
        print(csv_array['mobile'][_dir][str(_seed)]['people'].sum())
    print('\n')

2355.0
2409.0
2624.0
2524.0
2676.0
2320.0
3017.0
2234.0
2239.0
2793.0


4067.0
6783.0
5938.0
5217.0
5889.0
6253.0
7967.0
6063.0
5456.0
6415.0


18584.0
17388.0
15204.0
20869.0
16587.0
15774.0
14592.0
21879.0
19965.0
16815.0


40848.0
42033.0
39945.0
42781.0
31322.0
38863.0
37694.0
36250.0
38240.0
41100.0




In [13]:
times_list = [str(3600 * (i + 1)) for i in range(6)]
for time in times_list:
    for _dir in dir_list:
        tmp = pd.read_csv('/Users/kessapassa/OneDrive/research_log/logs/3D/' + 'mobile'+ _dir + '.csv', index_col=0)
        print(_dir+', '+time+': '+str(tmp[time].sum()))
    print('\n')

2_8, 3600: 1672.6000000000001
4_6, 3600: 2414.2000000000003
6_4, 3600: 3793.6
8_2, 3600: 5896.9


2_8, 7200: 561.1
4_6, 7200: 1218.1
6_4, 7200: 3906.7000000000007
8_2, 7200: 7749.900000000001


2_8, 10800: 216.2
4_6, 10800: 723.2999999999998
6_4, 10800: 2988.5
8_2, 10800: 7027.5


2_8, 14400: 69.2
4_6, 14400: 600.4000000000001
6_4, 14400: 2509.7999999999997
8_2, 14400: 6444.700000000001


2_8, 18000: 0.0
4_6, 18000: 529.4
6_4, 18000: 2347.7999999999997
8_2, 18000: 6030.800000000001


2_8, 21600: 0.0
4_6, 21600: 519.4
6_4, 21600: 2219.2999999999997
8_2, 21600: 5757.8




# エリア外のデータも

In [75]:
df_before = {}
for _dir in dir_list:
        df_before[_dir] = pd.read_csv('/Users/kessapassa/OneDrive/research_log/logs/RawData/' + _dir +'seed123.csv', 
                                            names=['id', 'type', 'time', 'road', 'x', 'y'],
                                            encoding='shift_jis')
        df_before[_dir]['people'] = 1
        df_before[_dir] = df_before[_dir].groupby(['type', 'time'])
#         print(_dir)
#         print(str(times['people'].sum()))
#         print('合計: '+str(times['people'].sum().sum()) + '\n')

In [94]:
df_after = {}
for _dir in dir_list:
        df_after[_dir] = pd.read_csv('/Users/kessapassa/OneDrive/research_log/logs/Origin/' + _dir +'_seed123.csv',
                                            encoding='shift_jis')
        df_after[_dir]['people'] = 1
        df_after[_dir] = df_after[_dir].groupby(['type', 'time'])
#         print(_dir)
#         print(str(times['people'].sum()))
#         print('合計: '+str(times['people'].sum().sum()) + '\n')

In [96]:
for _dir in dir_list:
    print(_dir)
    
    print(df_before[_dir]['people'].sum())
    print('合計: '+str(df_before[_dir]['people'].sum().sum()) + '\n')
    
#     print(df_after[_dir]['people'].sum())
#     print('合計: '+str(df_after[_dir]['people'].sum().sum()) + '\n\n')

2_8
type         time 
 Pedestrian  3600     1358
             7200      429
             10800     102
 Vehicle     3600      505
             7200      133
             10800      80
Name: people, dtype: int64
合計: 2607

4_6
type         time 
 Pedestrian  3600     1158
             7200      354
             10800      68
             14400      11
 Vehicle     3600     1375
             7200      589
             10800     418
             14400     367
Name: people, dtype: int64
合計: 4340

6_4
type         time 
 Pedestrian  3600      835
             7200      284
             10800      54
             14400       6
             18000       3
 Vehicle     3600     3301
             7200     3995
             10800    3104
             14400    2555
             18000    2433
             21600    2272
Name: people, dtype: int64
合計: 18842

8_2
type         time 
 Pedestrian  3600      554
             7200      196
             10800      46
             14400       5
             

In [93]:
before = pd.read_csv('/Users/kessapassa/OneDrive/research_log/logs/RawData/' + '4_6'+'seed123.csv', 
                        names=['id', 'type', 'time', 'road', 'x', 'y'],
                        encoding='shift_jis')
before
# before.sort_values(['y'])
# tmp['people'] = 1
# times = tmp.groupby('time')
# times.sum()

Unnamed: 0,id,type,time,road,x,y
0,62378,Vehicle,3600,road14525,-9555.3500,-8086.9000
1,60018,Pedestrian,3600,,-1858.5000,-2641.8600
2,51697,Pedestrian,3600,,-6156.2800,-5652.8800
3,49768,Vehicle,3600,road619,-1678.7900,-2557.9700
4,44601,Vehicle,3600,金沢港線(census)(9),-1525.0600,-1245.9900
5,39441,Vehicle,3600,road585,-1980.1400,-953.1860
6,57801,Vehicle,3600,,-1985.8600,-1057.0300
7,60734,Pedestrian,3600,,-2232.9300,-970.2780
8,62806,Pedestrian,3600,,-2617.0700,2508.1600
9,64404,Vehicle,3600,road32,-1888.2000,-2466.0300


In [92]:
after = pd.read_csv('/Users/kessapassa/OneDrive/research_log/logs/Origin/' + '4_6'+'_seed123.csv', 
                        encoding='shift_jis')
after
# after.sort_values(['y'])

Unnamed: 0,id,type,time,road,x,y,area
0,62378,Vehicle,3600,road14525,-9555.3500,-8086.9000,6
1,60018,Pedestrian,3600,,-1858.5000,-2641.8600,21
2,51697,Pedestrian,3600,,-6156.2800,-5652.8800,13
3,49768,Vehicle,3600,road619,-1678.7900,-2557.9700,22
4,44601,Vehicle,3600,金沢港線(census)(9),-1525.0600,-1245.9900,28
5,39441,Vehicle,3600,road585,-1980.1400,-953.1860,27
6,57801,Vehicle,3600,,-1985.8600,-1057.0300,27
7,60734,Pedestrian,3600,,-2232.9300,-970.2780,27
8,64404,Vehicle,3600,road32,-1888.2000,-2466.0300,21
9,54059,Vehicle,3600,road768,-1984.8000,-945.7140,27
