In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
area_list = [str(i) for i in range(81)]
col_names = [ 'c{0:02d}'.format(i) for i in range(30)]

In [3]:
from collections import namedtuple
ARGS_FOR_LIST = namedtuple('FOR_LIST', ('dir', 'ratio', 'seed', 'csv'))

def get_for_list():
    dir_list = ['p10000', 'p20000', 'p30000']
    ratio_list = ['r4', 'r5', 'r6']
    seed_list = ['s'+str(123 + i) for i in range(1)]
    csv_list = ['od']

    return ARGS_FOR_LIST(dir_list, ratio_list, seed_list, csv_list)

In [4]:
def get_read_path(name):
    path = '/Users/kessapassa/OneDrive/research_log/20190402/' + name + '/'
    return path

def get_file_name(args):
    return args.dir + args.ratio + args.seed + '_' + args.csv + '.csv'

def get_full_path(name, args):
    return get_read_path(name) + get_file_name(args)

In [5]:
def get_full_area_path(name, args, area):
    return get_read_path(name) + args.dir + args.ratio + args.seed + '_' + args.csv + area + '.csv'

In [6]:
def for_default_init(func, array):
    for_list = get_for_list()
    
    for _dir in for_list.dir:
        array[_dir] = {}
        
        for _ratio in for_list.ratio:
            array[_dir][_ratio] = {}
            
            for _seed in for_list.seed:
                array[_dir][_ratio][_seed] = {}
                
                for _csv in for_list.csv:
                    array[_dir][_ratio][_seed][_csv] = {}
                    args = ARGS_FOR_LIST(_dir, _ratio, _seed, _csv)
                    func(args, array)

In [7]:
def for_default(func):
    for_list = get_for_list()  
    for _dir in for_list.dir:
        for _ratio in for_list.ratio:
            for _seed in for_list.seed:
                for _csv in for_list.csv:
                    args = ARGS_FOR_LIST(_dir, _ratio, _seed, _csv)
                    func(args)

In [8]:
df_base = {}
def create2d(args, array):
    df = np.zeros((6 * 81, 3))
    df = pd.DataFrame(df, columns=['time', 'area', 'people'])

    index = 0
    for time in range(6):
        for area in range(81):
            df.loc[index, ['time', 'area']] = [[3600 * (time + 1), area]]
            index += 1
    array[args.dir][args.ratio][args.seed][args.csv] = df.copy()

for_default_init(create2d, df_base)

In [9]:
def interpolate_time(time):
    time = int(time)
    times_list = [3600 * (i + 1) for i in range(6)]
    times = ''
    
    if 0 <= time <= times_list[0]:
        times = times_list[0]
    elif times_list[0] <= time <= times_list[1]:
        times = times_list[1]
    elif times_list[1] <= time <= times_list[2]:
        times = times_list[2]
    elif times_list[2] <= time <= times_list[3]:
        times = times_list[3]
    elif times_list[3] <= time <= times_list[4]:
        times = times_list[4]
    elif times_list[4] <= time <= times_list[5]:
        times = times_list[5]

    return times

In [10]:
# times_list = [str(3600 * (i + 1)) for i in range(6)]
# empty_array = {}
# def create_empty_3d(args, array):
#     for _times in times_list:
#         array[args.dir][args.ratio][args.seed][args.csv][_times] = create_empty_base()

# for_default_init(create_empty_3d, empty_array)

In [11]:
# df = df_base['p10000']['r4']['s123']['od'].copy()
# df.loc[(df['time']==3600) & (df['area']==44), 'people'] += 1
# df.loc[(df['time']==3600) & (df['area']==44), 'people']

In [12]:
def for_convert_road(func):
    for_list = get_for_list()  
    for _dir in for_list.dir:
        for _ratio in for_list.ratio:
            for _seed in for_list.seed:
                for _csv in ['census']:
                    args = ARGS_FOR_LIST(_dir, _ratio, _seed, _csv)
                    func(args)

In [13]:
road_to_area = {}
def create_road_to_area(road, area):
    road_to_area[road] = float(area)

def func_road_to_area(args):
    df = pd.read_csv(get_full_path('include_area_-1', args),
                     encoding='Shift_JISx0213')
    df = df.loc[:, ['road', 'area']]
    for row in np.asanyarray(df):
        create_road_to_area(row[0], row[1])

    road_to_area[np.nan] = np.nan
    
for_convert_road(func_road_to_area)

In [14]:
def distribute(df, args):
    for row in df.values.tolist():
        tmp = row[0:2]
        stack = -1
        for i in range(len(row)):
            if (i >= 3) and (type(row[i]) is str) and ('(census)' in row[i]):
                road_name = row[i].split('(census)')[0]
                _area = road_to_area[road_name]
                
                if stack != _area:
                    _time = interpolate_time(row[i].split('@')[1])
                    df = df_base[args.dir][args.ratio][args.seed][args.csv]
                stack = _area
                
                df.loc[(df['time']==_time) & (df['area']==_area), 'people'] += 1

In [15]:
df = df_base['p10000']['r4']['s123']['od'].copy()
df.loc[(df['time']==3600) & (df['area']==44), 'people']

44    0.0
Name: people, dtype: float64

In [16]:
def main(args):
    df = pd.read_csv(get_full_path('Origin', args),
                  names=col_names, 
                  encoding='Shift_JISx0213')

    df.replace(' ', np.NaN, inplace=True)
    df.dropna(how='all', axis=1, inplace=True)
    distribute(df.copy(), args)
    
    df_base[args.dir][args.ratio][args.seed][args.csv].to_csv(get_full_path('hoge', args), index=False)
    
for_default(main)

In [18]:
# df = df.dropna(how='all')
# df = df.applymap(lambda x: road_to_area[x])
# df.head()

In [19]:
dellist = lambda items, indexes: [item for index, item in enumerate(items) if index not in indexes]

def remove_and_fill_none_area(series):
    row = series.values.tolist()
    if -1 in row:
        # -1のindexをリストで取得
        index_list = [i for i, x in enumerate(row) if x == -1]
        # popと同じような原理で削除し横詰め
        row = dellist(row, index_list)
        # 削除した分、ズレが生じるので最後尾に消した数のNaNを追加
        row.extend([np.nan for i in range(len(index_list))])
        
        return pd.Series(row, index=series.index)
    else:
        return series

In [20]:
# df['count'] = df.T.apply(lambda x: x.count())
# df = df.sort_values(['count'], ascending=False)
# df.reset_index(drop=True, inplace=True)
# df.head()

In [21]:
def convert_road_to_area_od(args):
    df = csv_array[args.dir][args.ratio][args.seed][args.csv].copy()
#     df = df.dropna(how='all')
    df.iloc[:, 3:] = df.iloc[:, 3:].applymap(lambda x: road_to_area[x])
    df = df[df.loc[:, 'c03'] >= 0]
    dfT = df.T
    dfT = dfT.apply(remove_and_fill_none_area)
    df = dfT.T
    df.reset_index(drop=True, inplace=True)
#             df = pd.concat([df_id, df], axis=1)
    df = df.rename(columns={'c00': 'id'})
    df = df.sort_values(['id'])
    df = df.drop(['c01', 'c02'], axis=1)
    csv_array[args.dir][args.ratio][args.seed][args.csv] = df
    df.to_csv(get_full_path('converted_road_to_area_od', args), index=False)
    
for_default(convert_road_to_area_od)

NameError: name 'csv_array' is not defined

In [None]:
def create_next_move_area(df, area):
    area = float(area)
    df_area = pd.DataFrame(np.zeros((81, 2)), columns=['area', 'people'])
    df_area['area'] = [float(i) for i in range(81)]
    
    for row in df.values.tolist():
        if area in row:
            index = row.index(area)
            if (len(row) > index+1) and (not np.isnan(row[index+1])):
                df_area.loc[df_area['area'] == row[index+1], 'people'] += 1
                    
    return df_area

In [None]:
move_array = {}
def more(args, array):
    df = csv_array[args.dir][args.ratio][args.seed][args.csv].copy()
    for _area in area_list:
        df_area = create_next_move_area(df, _area)
        df_area.to_csv(get_full_area_path('next_move_area', args, _area), index=False)
        array[args.dir][args.ratio][args.seed][args.csv][_area] = df_area
    print(get_file_name(args))
    
for_default_init(more, move_array)