In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
root_dir = '/Users/kessapassa/OneDrive/research_log/20190104/Origin/'

In [3]:
dir_list = ['people10000', 'people20000', 'people30000']
seed_list = [str(123 + i) for i in range(3)]
csv_list = ['od']
area_list = [str(i) for i in range(36)]

In [4]:
col_names = [ 'c{0:02d}'.format(i) for i in range(30)]

In [5]:
csv_array = {}
for _dir in dir_list:
    csv_array[_dir] = {}
    for _seed in seed_list:
        csv_array[_dir][_seed] = {}
        for _csv in csv_list:
            df = pd.read_csv(root_dir + _dir + 'seed' + _seed + '_' + _csv + '.csv',
                                                names=col_names,
                                                encoding='Shift_JISx0213')
            df.replace(' ', np.NaN, inplace=True)
            df.dropna(how='all', axis=1, inplace=True)
            df = df.applymap(lambda x: (x.split('(census)')[0]) if (type(x) is str) and ('(census)' in x) else x)
            csv_array[_dir][_seed][_csv] = df

In [6]:
people = 'people30000'
seed = '124'
csv = 'od'
df = csv_array[people][seed][csv].copy()
df = df.iloc[:, 3:]

In [7]:
df.count()

c03    16820
c04    12731
c05     8914
c06     5858
c07     3786
c08     2446
c09     1667
c10     1167
c11      811
c12      485
c13      259
c14      170
c15       77
c16       36
c17       16
c18       10
c19        7
c20        5
c21        3
c22        2
dtype: int64

In [8]:
od_array = []

for series in np.asanyarray(df.T):
    series = pd.Series(series)
    counts = series.value_counts()
    df_tmp = pd.DataFrame([counts.index, counts]).T
    df_tmp.columns = ['area', 'people']
    od_array.append(df_tmp)

In [9]:
len(od_array)

20

In [10]:
od_array[14]

Unnamed: 0,area,people
0,国道157号h,2
1,石川県道215号b,2
2,石川県道22号k,2
3,石川県道146号a,1
4,石川県道215号c,1
5,石川県道179号c,1
6,国道159号a,1
7,石川県道193号a,1
8,国道8号c,1
9,石川県道215号a,1


In [11]:
road_to_area = {}
def create_road_to_area(road, area):
    road_to_area[road] = area
    
for _dir in dir_list:
    for _seed in seed_list:
        for _csv in ['census']:
            df = csv_array[people][seed][csv].copy()
            df = pd.read_csv('/Users/kessapassa/OneDrive/research_log/20190109/Origin/' + _dir + 'seed' + _seed + '_' + _csv + '.csv',
                                                encoding='Shift_JISx0213')
            df = df.loc[:, ['road', 'area']]
            for row in np.asanyarray(df):
                create_road_to_area(row[0], row[1])

In [12]:
for row in od_array:
    row['area'] = row['area'].apply(lambda x: road_to_area[x])

In [13]:
od_array[14]

Unnamed: 0,area,people
0,14,2
1,-1,2
2,15,2
3,21,1
4,-1,1
5,8,1
6,28,1
7,14,1
8,6,1
9,-1,1


In [14]:
for index in range(len(od_array)):
    od_array[index] = od_array[index].groupby('area').apply(lambda x: x.sum()).drop('area', axis=1).reset_index()

In [15]:
od_array[14]

Unnamed: 0,area,people
0,-1,6.0
1,6,1.0
2,8,1.0
3,14,3.0
4,15,2.0
5,21,2.0
6,28,1.0


In [16]:
df_base = pd.DataFrame(np.zeros((37, len(od_array)+1)))
df_base.rename(columns={0: 'area'}, inplace=True)
df_base['area'] = [i for i in range(-1, 36)]

In [17]:
for index in range(len(od_array)):
    for _index, _row in np.asanyarray(od_array[index]):
        _index = int(_index)
        df_base.iloc[_index+1, index+1] = _row

In [18]:
df_base

Unnamed: 0,area,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,-1,1313.0,738.0,470.0,416.0,410.0,320.0,194.0,161.0,146.0,...,60.0,53.0,18.0,13.0,6.0,5.0,3.0,2.0,1.0,1.0
1,0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,9.0,7.0,0.0,2.0,2.0,1.0,1.0,1.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,21.0,9.0,4.0,5.0,7.0,3.0,8.0,8.0,3.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,6,248.0,193.0,203.0,42.0,49.0,52.0,89.0,107.0,97.0,...,37.0,27.0,24.0,7.0,1.0,1.0,1.0,0.0,0.0,0.0
8,7,10.0,4.0,3.0,0.0,0.0,3.0,2.0,2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,8,116.0,81.0,39.0,40.0,43.0,40.0,47.0,38.0,28.0,...,6.0,3.0,2.0,1.0,1.0,0.0,1.0,3.0,2.0,0.0


In [19]:
# df_base.to_csv(people + 'seed' + seed + '_' + csv + '.csv')