In [4]:
import pandas as pd
import json
from tqdm import tqdm
import plotly.express as px
from pathlib import Path  

In [5]:
pd.set_option('display.max_columns', None) 

In [6]:
import seaborn as sns

def get_color(number):
    color = sns.color_palette("deep")[number]
    actual_rgb = tuple(int(255*x) for x in color)
    actual_hex = '#%02x%02x%02x' % actual_rgb
    return actual_hex

In [7]:
def cast_to_string(routes_property):
    return [str(int).replace("[", "").replace("]", "") for int in routes_property]

class GIS_processing():
    
    df = pd.DataFrame()
    df_clear = pd.DataFrame()
    dfs = pd.DataFrame()
    
    def __init__(self) -> None:
        pass
    
    def add_additionals(self, filepath) -> None:
        self.df = pd.read_csv(filepath)
        route_type = []
        start_point_meters = []
        finish_point_meters = []
        start_point_part = []
        finish_point_part = []
        instruction_type = []
        
        for i in tqdm(range(len(self.df["drivingDirection_json"]))):
            route_type.append(json.loads(self.df["drivingDirection_json"][i])["type"])
            start_point_meters.append(json.loads(self.df["drivingDirection_json"][i])['start_point']['meters'])
            finish_point_meters.append(json.loads(self.df["drivingDirection_json"][i])['finish_point']['meters'])
            start_point_part.append(json.loads(self.df["drivingDirection_json"][i])['start_point']['part'])
            finish_point_part.append(json.loads(self.df["drivingDirection_json"][i])['finish_point']['part'])
            if ('instruction' in json.loads(self.df["drivingDirection_json"][i])):
                instruction_type.append(json.loads(self.df["drivingDirection_json"][i])['instruction']['type'])
            else:
                 instruction_type.append(-1)
        route_type = pd.DataFrame(cast_to_string(route_type), columns=['route_type'])
        start_point_meters = pd.DataFrame(cast_to_string( start_point_meters), columns=['start_point_meters'])
        finish_point_meters = pd.DataFrame(cast_to_string(finish_point_meters), columns=['finish_point_meters'])
        start_point_part = pd.DataFrame(cast_to_string(start_point_part), columns=[' start_point_part'])
        finish_point_part = pd.DataFrame(cast_to_string(finish_point_part), columns=['finish_point_part'])
        instruction_type = pd.DataFrame(cast_to_string(instruction_type), columns=['instruction_type'])
        
        self.df = self.df.join(route_type.join(start_point_meters.join(finish_point_meters.join(start_point_part.join(finish_point_part.join(instruction_type))))))
        self.df.to_csv('add_' + Path(filepath).name)
        
    def flatternize(self, filepath, items_column) -> None:
    
        self.df = pd.read_csv(filepath)
        routes_edges = []
        routes_time = []
        routes_speed = []
        routes_length = []
#        routes_traffic_type = []

        for i in tqdm(range(len(self.df))):
            routes_edges.append([])
            routes_time.append([])
            routes_speed.append([])
            routes_length.append([])
#            routes_traffic_type.append([])
            for j in range(len(json.loads(self.df.iloc[i, items_column])['items'])):
                 for k in range(len(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'])):
                        routes_edges[i].append(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'][k]['edge_id'])
                        routes_time[i].append(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'][k]['time'])
                        routes_speed[i].append(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'][k]['speed'])
                        routes_length[i].append(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'][k]['length'])
#                if ('traffic_type' in json.loads(df.iloc[i, items_column])['items'][j]['edges'][k]):
#                            routes_traffic_type[i].append(json.loads(self.df.iloc[i, items_column])['items'][j]['edges'][k]['traffic_type'])
#                        else:
#                            routes_traffic_type[i].append(-1)
                            
        edges_pd = pd.DataFrame(cast_to_string(routes_edges), columns=['edges'])
        time_pd = pd.DataFrame(cast_to_string(routes_time), columns=['time'])
        speed_pd = pd.DataFrame(cast_to_string(routes_speed), columns=['speed'])
        routes_length_pd = pd.DataFrame(cast_to_string(routes_length), columns=['length'])
#        routes_traffic_type_pd = pd.DataFrame(cast_to_string(routes_traffic_type), columns=['directionality'])
        
        
        self.df = self.df.join(edges_pd.join(time_pd.join(speed_pd.join(routes_length_pd)))).drop(['start_json', 'end_json', 'navigationId', "start_utc", "end_utc", "ETA", "build_utc", "build_timestamp"], axis=1)
        self.df.to_csv('processed_' + Path(filepath).name)
        
    def plot_time_freq(self, routes_1, routes_2):
        
        def freq_counter(routes):
            freq = []
            time = []
            index = pd.DatetimeIndex(routes['start_timestamp'])
            for i in range(0, 23):
                freq.append(len(routes.iloc[index.indexer_between_time(str(0+i) + ':00', str(1+i) + ':00')]))
                time.append(str(0+i) + ':00 - ' +  str(1+i) + ':00');
            freq.append(len(routes.iloc[index.indexer_between_time('23:00','00:00')]))
            time.append('23:00 - 00:00')
            df = pd.DataFrame(freq, index = time, columns = ['frequencies'])
            return(df)

        def draw_freq_hist(freq_df):
            fig = px.bar(freq_df, x=freq_df.index, y='frequencies')
            fig.show()

        def draw_freq_line(freq_1, freq_2):
            fig = px.line(freq_1, x=freq_1.index, y='frequencies')
            fig.add_scatter(x=freq_2.index, y=freq_2['frequencies'], mode='lines')
            fig.show()
            
        freq_city_1 = freq_counter(routes_1)
        freq_city_2 = freq_counter(routes_2)
        
        draw_freq_line(freq_city_1, freq_city_2)
        
    def plot_time_freq(self, routes_1, routes_2):
        
        def flat_list(data):
            return [int(item.replace("'", "")) for sublist in data for item in sublist]
        
        def get_use_data(routes_omsk):
            day_edges = [] 
            for i in range(0, 2):
                tmp = routes_omsk[(routes_omsk['start_timestamp'] >= '2020-12-0' + str(7+i)) & (routes_omsk['start_timestamp'] < '2020-12-0' + str(7+i+1))]['new_edges'].to_list()
                tmp_cl = [x for x in tmp if str(x) != 'nan']
                for j in range(len(tmp_cl)):
                    tmp_cl[j] = tmp_cl[j].split(',')
                day_edges.append(flat_list(tmp_cl))

            tmp = routes_omsk[(routes_omsk['start_timestamp'] >= '2020-12-09') & (routes_omsk['start_timestamp'] < '2020-12-10')]['new_edges'].to_list()
            tmp_cl = [x for x in tmp if str(x) != 'nan']
            for j in range(len(tmp_cl)):
                tmp_cl[j] = tmp_cl[j].split(',')
            day_edges.append(flat_list(tmp_cl))

            for i in range(0, 4):
                tmp = routes_omsk[(routes_omsk['start_timestamp'] >= '2020-12-1' + str(i)) & (routes_omsk['start_timestamp'] < '2020-12-1' + str(i+1))]['new_edges'].to_list()
                tmp_cl = [x for x in tmp if str(x) != 'nan']
                tmp_cl = [x for x in tmp_cl if str(x) != '']
                for j in range(len(tmp_cl)):
                    tmp_cl[j] = tmp_cl[j].split(',')
                day_edges.append(flat_list(tmp_cl))
                return day_edges
        
        def usage_to_dict(usage):
            counts = dict()
            for i in usage:
                counts[i] = counts.get(i, 0) + 1
            return counts

        def overall_to_dict(overall):
            counts = dict()
            for i in overall:
                counts[i] = 0
            return counts
        
        def draw_freq_line_inter(freq_1, freq_2, freq_3):
            fig = px.line(freq_1, x=freq_1.index, y='frequencies')
            fig.add_scatter(x=freq_2.index, y=freq_2['frequencies'], mode='lines', line = {'color': get_color(3), 'dash': 'solid'})
            fig.add_scatter(x=freq_3.index, y=freq_3['frequencies'], mode='lines', line = {'color': get_color(8), 'dash': 'solid'})
            fig.update_layout(showlegend=False)
            fig.show()
        
        overall = flat_list(dfs[1])
        usage_monday = usage_to_dict(get_use_data(routes_omsk_clear)[0])
        usage_wednesday = usage_to_dict(get_use_data(routes_omsk_clear)[1])

        usage_saturday = usage_to_dict(get_use_data(routes_omsk_clear)[5])
        usage_sunday = usage_to_dict(get_use_data(routes_omsk_clear)[6])
        overall = overall_to_dict(overall)
        
        weekdays = {k: overall.get(k, 0) + usage_monday.get(k, 0) + usage_wednesday.get(k, 0) 
            for k in set(overall) | set(usage_monday) | set(usage_wednesday)}
        weekend = {k: overall.get(k, 0) + usage_saturday.get(k, 0) + usage_sunday.get(k, 0) 
            for k in set(overall) | set(usage_saturday) | set(usage_sunday)}
        intersection = {x:min(weekdays[x], weekend[x]) for x in weekdays if x in weekend}
        
        weekdays = pd.DataFrame.from_dict(weekdays, orient = 'index', columns = ['frequencies']).reset_index()
        weekend = pd.DataFrame.from_dict(weekend, orient = 'index', columns = ['frequencies']).reset_index()
        intersection = pd.DataFrame.from_dict(intersection, orient = 'index', columns = ['frequencies']).reset_index()
        
        draw_freq_line_inter(intersection, weekdays, weekend)
        
        
    def clear_flatternized(self, filepath = None) -> None:

        def del_heads_n_tails(route):
            counter_d = 0
            edge_arr = route['edges'].split(',')
            time_arr = route['time'].split(',')
            speed_arr = route['speed'].split(',')
            len_arr = route['length'].split(',')
#            dir_arr = route['directionality'].split(',')
            
            indexes = []
            for j in range(len(edge_arr)):
                if (int(edge_arr[j]) == 0 or int(time_arr[j]) == 0):
                    indexes.append(j)

            for j in range(len(edge_arr) - 1):
                if (int(edge_arr[j]) == int(edge_arr[j+1])):
                    indexes.append(j)

            indexes = set(indexes)
            counter_d = len(indexes)
            for index in sorted(indexes, reverse=True):
                del edge_arr[index]
                del time_arr[index]
                del speed_arr[index]
                del len_arr[index]
#                del dir_arr[index]                                                             
            
            return [edge_arr, time_arr, speed_arr, len_arr, counter_d]

        def clear_routes_data(routes):
            edges_clear = []
            time_clear = []
            speed_clear = []
            len_clear = []
 #           dir_clear = []
                                                                                     
            counter_do = 0
            for i in tqdm(range(len(routes))):
                route_data = del_heads_n_tails(routes.iloc[i, :])
                edges_clear.append(route_data[0])
                time_clear.append(route_data[1])
                speed_clear.append(route_data[2])
                len_clear.append(route_data[3])
#                dir_clear.append(route_data[4])
                counter_do += route_data[4] 
            routes_properties = [[edges_clear, 'edges'], [time_clear, 'time'], [speed_clear, 'speed'], [len_clear, 'length']]
            for i in range(len(routes_properties)):
                routes_properties[i] = pd.DataFrame(cast_to_string(routes_properties[i][0]), columns = [routes_properties[i][1]])
            return [routes_properties, edges_clear, counter_do]
        
        if (filepath != None):
            self.dfs = clear_routes_data(pd.read_csv(filepath))
            self.df = pd.read_csv(filepath)
        else:          
            self.dfs = clear_routes_data(self.df)
#        self.df_clear = self.df.drop(['edges', 'time', 'speed', 'length', 'directionality'], axis=1).join(self.dfs[0][0].join(self.dfs[0][1].join(self.dfs[0][2].join(self.dfs[0][3].join(self.dfs[0][4])))))
        self.df_clear = self.df.drop(['edges', 'time', 'speed', 'length'], axis=1).join(self.dfs[0][0].join(self.dfs[0][1].join(self.dfs[0][2].join(self.dfs[0][3]))))
        self.df_clear.to_csv('clear_' + Path(filepath).name)
        
            

In [8]:
def day_period(routes):
    index = pd.DatetimeIndex(routes['start_timestamp'])
    routes['day_period'] = -1
    routes.loc[index.indexer_between_time('00:00', '06:00'), 'day_period'] = 0
    routes.loc[index.indexer_between_time('06:00', '11:00'), 'day_period'] = 1
    routes.loc[index.indexer_between_time('11:00', '19:00'), 'day_period'] = 2
    routes.loc[index.indexer_between_time('19:00', '00:00'), 'day_period'] = 3
    return routes

def week_period(clear):
    clear['week_period'] = -1
    clear.loc[('2020-12-14' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-15'), 'week_period'] = 0
    clear.loc[('2020-12-13' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-14'), 'week_period'] = 1
    clear.loc[('2020-12-12' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-13'), 'week_period'] = 1
    clear.loc[('2020-12-11' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-12'), 'week_period'] = 0
    clear.loc[('2020-12-10' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-11'), 'week_period'] = 0
    clear.loc[('2020-12-09' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-10'), 'week_period'] = 0
    clear.loc[('2020-12-08' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-09'), 'week_period'] = 0
    clear.loc[('2020-12-07' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-08'), 'week_period'] = 0
    clear.loc[('2020-12-06' <= clear['start_timestamp']) & (clear['start_timestamp'] <= '2020-12-07'), 'week_period'] = 1
    return clear
def weather_period(clear, prop, values):
    clear[prop] = -1    
    
    night = pd.DatetimeIndex(clear['start_timestamp']).indexer_between_time('00:00', '11:00')
    day = pd.DatetimeIndex(clear['start_timestamp']).indexer_between_time('11:00', '19:00')
    evening = pd.DatetimeIndex(clear['start_timestamp']).indexer_between_time('19:00', '06:00')
    parts = [night, day, evening]
    v_index = 0
    dates = ['2020-12-07', '2020-12-08', '2020-12-09', '2020-12-10', '2020-12-11', '2020-12-12', '2020-12-13', '2020-12-14', '2020-12-15']
    for j in range(len(dates) - 1):
        for i in range(len(parts)):
            s = (dates[j] <= clear.loc[parts[i], 'start_timestamp']) & (clear.loc[parts[i], 'start_timestamp'] <= dates[j+1])
            clear.loc[(s)[s].index, prop] = values[v_index]
            if (i+2 != len(parts)):
                v_index += 1

    return clear

In [450]:
for i in range(len(props)):
    pure = weather_period(pure, props[i], vals_abakan[i])

In [492]:
for i in range(len(props)):
    pure_omsk = weather_period(pure_omsk, props[i], vals_omsk[i])

In [486]:
pure_omsk = pd.read_csv('routes_final_omsk_rev2t.csv', index_col=0)

In [485]:
pure_omsk.loc[(pure_omsk['start_timestamp'] >= '2020-12-07')].to_csv('routes_final_omsk_rev2t.csv', index=False)

In [504]:
pure.drop(["Unnamed: 0.1", "Unnamed: 0.1.1"], axis = 1).to_csv('routes_final_abakan_rev3.csv', index=False)

In [500]:
pure_omsk.drop(["Unnamed: 0.1", "Unnamed: 0.1.1", "index"], axis = 1).to_csv('routes_final_omsk_rev3.csv', index=False)

In [494]:
pure_omsk.to_csv('routes_final_omsk_rev3.csv')

KeyboardInterrupt: 

In [454]:
props = ['clouds', 'snow', 'temperature', 'wind', 'pressure']
vals_abakan = [clouds_abakan, snow_abakan, temp_abakan, wind_abakan, press_abakan]
vals_omsk = [clouds_omsk, snow_omsk, temp_omsk, wind_omsk, press_omsk]

In [453]:
clouds_omsk = [1, 4, 4, 4, 4, 4, 4, 4, 1, 4, 1, 1, 4, 4, 1, 0]
snow_omsk = [0, 1, 1, 0,1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
temp_omsk = [2, 2, 1, 1, 1, 1, 0, 1, 1,  1, 2, 3, 1, 1, 1, 2]
wind_omsk = [2, 2, 3, 3, 2, 2, 4, 4, 3,  3, 2, 2, 2, 2, 4, 4]
press_omsk = [2,  1, 0, 1, 1, 1, 1, 2, 3, 2, 2, 2, 1, 2, 1, 1]

In [188]:
clouds_abakan = [0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0]
snow_abakan = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
temp_abakan = [3, 3, 0, 0, 2, 3, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1]
wind_abakan = [1, 1, 7, 7, 2, 2, 1, 1, 1, 1, 4, 4, 6, 6, 4, 4]
press_abakan = [2, 1, 1, 0, 2, 2, 1, 1, 2, 2, 3, 2, 2, 2, 2, 2]

In [447]:
pure = pd.read_csv('routes_final_abakan_rev2.csv', index_col=0)

In [472]:
pure_omsk = pd.read_csv('routes_final_omsk_rev2.csv', index_col=0)

In [10]:
processing =  GIS_processing()

In [11]:
processing.flatternize('abakan.csv', 15)

 48%|████████████████████████████████████▋                                       | 10142/20993 [18:47<20:06,  9.00it/s]


KeyboardInterrupt: 

In [12]:
processing.add_additionals('processed_abakan.csv')

FileNotFoundError: [Errno 2] File processed_abakan.csv does not exist: 'processed_abakan.csv'

In [None]:
processing.clear_flatternized("add_processed_abakan.csv")

In [350]:
clear = pd.read_csv("clear_add_processed_abakan.csv", index_col=0)

In [199]:
clear

Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,dist_to_b,dist_to_a,start_timestamp,RTA,real_dist,pred_dist,rebuildCount,drivingDirection_json,route_type,start_point_meters,finish_point_meters,start_point_part,finish_point_part,instruction_type,edges,time,speed,length,directionality
0,0,0,48,0,2020-12-12 10:11:27.0,501,4245,4163,2,"{""id"":50,""items"":[{""distance"":0,""duration"":0,""...",traffic,6.0,16.0,0.722993,0.290994,-1,"' 19423207912141719', ' 19423207912141691', ' ...","' 4800', ' 23160', ' 2640', ' 11840', ' 2700',...","' 15', ' 15', ' 15', ' 20', ' 20', ' 35', ' 40...","' 20', ' 34', ' 11', ' 38', ' 15', ' 62', ' 13...","' 2', ' 2', ' 2', ' -1', ' -1', ' -1', ' -1', ..."
1,1,1,81,2,2020-12-13 14:25:20.0,475,3787,4178,1,"{""id"":55,""items"":[{""distance"":0,""duration"":0,""...",traffic,11.0,32.0,0.496820,0.434923,-1,"' 19423207912142002', ' 19423207912142014', ' ...","' 8640', ' 8880', ' 15800', ' 960', ' 6080', '...","' 15', ' 15', ' 15', ' 15', ' 50', ' 45', ' 20...","' 36', ' 37', ' 45', ' 4', ' 15', ' 52', ' 90'...","' 2', ' 2', ' 2', ' 2', ' -1', ' -1', ' -1', '..."
2,2,2,42,0,2020-12-08 16:05:26.0,146,837,797,2,"{""id"":1443,""items"":[{""distance"":0,""duration"":0...",traffic,1.0,7.0,0.094503,0.726939,-1,"' 140060152397995913', ' 140060152397995843', ...","' 24720', ' 15360', ' 25800', ' 2400', ' 15421...","' 15', ' 15', ' 15', ' 15', ' 19', ' 19', ' 19...","' 103', ' 64', ' 45', ' 10', ' 55', ' 15', ' 2...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'..."
3,3,3,18,0,2020-12-10 07:54:23.0,152,578,490,1,"{""id"":0,""items"":[{""distance"":0,""duration"":0,""e...",traffic,14.0,17.0,0.796066,0.476703,-1,"' 140060152255006264', ' 140060152255006409', ...","' 9360', ' 11960', ' 36960', ' 23169', ' 39320...","' 15', ' 15', ' 15', ' 26', ' 15', ' 15'","' 39', ' 29', ' 154', ' 59', ' 143', ' 49'","' 2', ' 2', ' 2', ' 2', ' 2', ' 2'"
4,4,4,61,21,2020-12-07 15:47:28.0,375,2086,2081,2,"{""id"":22,""items"":[{""distance"":0,""duration"":0,""...",traffic,5.0,10.0,0.179399,0.393122,-1,"' 19423207912137938', ' 19423207912125448', ' ...","' 12208', ' 18313', ' 17880', ' 13824', ' 5040...","' 23', ' 23', ' 25', ' 25', ' 25', ' 25', ' 25...","' 78', ' 117', ' 20', ' 96', ' 35', ' 54', ' 1...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20988,20988,20988,57,13,2020-12-08 07:18:45.0,367,4427,4450,5,"{""id"":27,""items"":[{""distance"":0,""duration"":0,""...",traffic,1.0,145.0,0.441659,1.000000,-1,"' 19423207912140083', ' 19423207912127828', ' ...","' 5760', ' 17150', ' 4320', ' 3780', ' 2520', ...","' 15', ' 40', ' 40', ' 40', ' 40', ' 40', ' 40...","' 24', ' 135', ' 48', ' 42', ' 28', ' 21', ' 6...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'..."
20989,20989,20989,39,10,2020-12-08 13:31:20.0,306,2907,2961,1,"{""id"":28,""items"":[{""distance"":0,""duration"":0,""...",traffic,6.0,7.0,0.882815,0.109360,-1,"' 19423207912147914', ' 19423207912147908', ' ...","' 2880', ' 19920', ' 4560', ' 3840', ' 10466',...","' 15', ' 15', ' 15', ' 15', ' 27', ' 27', ' 27...","' 12', ' 83', ' 19', ' 16', ' 41', ' 44', ' 13...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'..."
20990,20990,20990,64,0,2020-12-08 07:18:40.0,225,1194,1462,3,"{""id"":282,""items"":[{""distance"":0,""duration"":0,...",traffic,4.0,41.0,0.289654,0.474029,-1,"' 19423207912117869', ' 19423207912118121', ' ...","' 3600', ' 12168', ' 4968', ' 1224', ' 8064', ...","' 50', ' 50', ' 50', ' 50', ' 25', ' 25', ' 30...","' 50', ' 169', ' 69', ' 17', ' 56', ' 20', ' 1...","' 2', ' 2', ' 2', ' 2', ' 2', ' -1', ' -1', ' ..."
20991,20991,20991,41,19,2020-12-08 18:30:29.0,356,3891,3977,3,"{""id"":26,""items"":[{""distance"":0,""duration"":0,""...",traffic,1.0,21.0,0.549844,0.073690,-1,"' 19423207912122025', ' 19423207912122123', ' ...","' 6276', ' 23452', ' 692', ' 22971', ' 18257',...","' 39', ' 23', ' 26', ' 21', ' 28', ' 23', ' 23...","' 68', ' 54', ' 5', ' 134', ' 142', ' 136', ' ...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'..."


In [201]:
week_period(day_period(clear)).to_csv('routes_final_abakan_rev2.csv')

In [134]:
processing2 =  GIS_processing()

In [145]:
processing2.flatternize('omsk.csv', 15)

  0%|                                                                              | 9/21502 [00:03<2:17:40,  2.60it/s]


KeyboardInterrupt: 

In [138]:
processing2.add_additionals('processed_recreated_omsk.csv')

100%|███████████████████████████████████████████████████████████████████████████| 21502/21502 [01:26<00:00, 248.40it/s]


In [135]:
omsl = pd.read_csv("omsk.csv")

In [146]:
processing2.clear_flatternized("add_processed_recreated_omsk.csv")

100%|██████████████████████████████████████████████████████████████████████████| 21502/21502 [00:10<00:00, 1986.40it/s]


In [471]:
week_period(day_period(pd.read_csv("clear_add_processed_recreated_omsk.csv", index_col=0))).to_csv('routes_final_omsk_rev2.csv')

In [149]:
pd.read_csv('final_omsk_rev1.csv')

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,dist_to_b,dist_to_a,start_timestamp,RTA,real_dist,pred_dist,rebuildCount,drivingDirection_json,route_type,start_point_meters,finish_point_meters,start_point_part,finish_point_part,instruction_type,edges,time,speed,length,day_period,week_period
0,0,0,0,0,27,29,2020-12-07 14:44:57.0,405,2667,2748,2,"{""id"":26,""items"":[{""distance"":0,""duration"":0,""...",traffic,3.0,30.0,0.317238,0.159921,-1,"' 564384472566312', ' 564384550293991', ' 5643...","' 16984', ' 8756', ' 9437', ' 23634', ' 24187'...","' 39', ' 37', ' 37', ' 23', ' 32', ' 23', ' 23...","' 184', ' 90', ' 97', ' 151', ' 215', ' 119', ...",2,0
1,1,1,1,1,84,0,2020-12-09 16:32:01.0,314,1507,1630,0,"{""id"":953,""items"":[{""distance"":0,""duration"":0,...",traffic,13.0,9.0,0.713783,0.560720,-1,"' 564384472534865', ' 140060152454583961', ' 5...","' 3840', ' 7200', ' 14400', ' 16747', ' 17530'...","' 15', ' 15', ' 15', ' 23', ' 23', ' 36', ' 36...","' 16', ' 30', ' 60', ' 107', ' 112', ' 53', ' ...",2,0
2,2,2,2,2,37,0,2020-12-07 07:00:37.0,279,1932,2054,0,"{""id"":998,""items"":[{""distance"":0,""duration"":0,...",traffic,7.0,1.0,0.585469,0.373613,-1,"' 140060152353538991', ' 564384472569135', ' 5...","' 32640', ' 1920', ' 2507', ' 835', ' 3085', '...","' 15', ' 15', ' 56', ' 56', ' 56', ' 56', ' 56...","' 136', ' 8', ' 39', ' 13', ' 48', ' 93', ' 20...",1,0
3,3,3,3,3,56,0,2020-12-10 16:22:14.0,1224,8486,8592,1,"{""id"":1577,""items"":[{""distance"":0,""duration"":0...",traffic,2.0,19.0,0.088712,0.832139,-1,"' 140060152387912459', ' 564384472531921', ' 5...","' 20160', ' 11040', ' 13920', ' 6480', ' 4080'...","' 15', ' 15', ' 15', ' 15', ' 15', ' 25', ' 17...","' 84', ' 46', ' 58', ' 27', ' 17', ' 90', ' 9'...",2,0
4,4,4,4,4,29,77,2020-12-09 14:19:01.0,808,6291,7143,3,"{""id"":326,""items"":[{""distance"":0,""duration"":0,...",traffic,0.0,6.0,0.156706,0.561412,-1,"' 564384472511971', ' 564384472588498', ' 5643...","' 13122', ' 2438', ' 8245', ' 12193', ' 20438'...","' 31', ' 31', ' 31', ' 31', ' 31', ' 31', ' 31...","' 113', ' 21', ' 71', ' 105', ' 176', ' 125', ...",2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21497,21497,21497,21497,21497,48,1,2020-12-06 23:46:13.0,567,642,687,3,"{""id"":20,""items"":[{""distance"":0,""duration"":0,""...",traffic,9.0,1.0,0.819638,0.297317,-1,"' 564384472579685', ' 140060152336071343', ' 5...","' 5280', ' 8400', ' 11040', ' 9840', ' 6960', ...","' 15', ' 15', ' 15', ' 15', ' 15', ' 15', ' 15...","' 22', ' 35', ' 46', ' 41', ' 29', ' 33', ' 49...",3,-1
21498,21498,21498,21498,21498,52,0,2020-12-06 23:44:54.0,640,7462,7593,0,"{""id"":104,""items"":[{""distance"":0,""duration"":0,...",traffic,2.0,11.0,0.898184,0.675203,-1,"' 140060152286544020', ' 564384472557631', ' 5...","' 3360', ' 3060', ' 5310', ' 1350', ' 360', ' ...","' 15', ' 40', ' 40', ' 40', ' 40', ' 40', ' 40...","' 14', ' 34', ' 59', ' 15', ' 4', ' 12', ' 100...",3,-1
21499,21499,21499,21499,21499,75,6,2020-12-06 23:21:01.0,1793,14556,14595,0,"{""id"":909,""items"":[{""distance"":0,""duration"":0,...",traffic,4.0,3.0,0.759182,0.122900,-1,"' 564384472546849', ' 140060152349445969', ' 5...","' 4320', ' 2160', ' 5040', ' 7920', ' 6480', '...","' 15', ' 15', ' 15', ' 15', ' 15', ' 24', ' 24...","' 18', ' 9', ' 21', ' 33', ' 27', ' 8', ' 97',...",3,-1
21500,21500,21500,21500,21500,35,0,2020-12-06 23:59:28.0,215,2227,2274,0,"{""id"":50,""items"":[{""distance"":0,""duration"":0,""...",traffic,5.0,2.0,0.624572,0.620290,-1,"' 140060152290560049', ' 564384472503215', ' 5...","' 2025', ' 3487', ' 13129', ' 13129', ' 3600',...","' 32', ' 32', ' 34', ' 34', ' 41', ' 41', ' 41...","' 18', ' 31', ' 124', ' 124', ' 41', ' 36', ' ...",3,-1


In [103]:
day_period(clear)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,dist_to_b,dist_to_a,start_timestamp,RTA,real_dist,pred_dist,rebuildCount,drivingDirection_json,route_type,start_point_meters,finish_point_meters,start_point_part,finish_point_part,instruction_type,edges,time,speed,length,directionality,day_period,week_period
0,0,0,0,48,0,2020-12-12 10:11:27.0,501,4245,4163,2,"{""id"":50,""items"":[{""distance"":0,""duration"":0,""...",traffic,6.0,16.0,0.722993,0.290994,-1,"' 19423207912141719', ' 19423207912141691', ' ...","' 4800', ' 23160', ' 2640', ' 11840', ' 2700',...","' 15', ' 15', ' 15', ' 20', ' 20', ' 35', ' 40...","' 20', ' 34', ' 11', ' 38', ' 15', ' 62', ' 13...","' 2', ' 2', ' 2', ' -1', ' -1', ' -1', ' -1', ...",1,-1
1,1,1,1,81,2,2020-12-13 14:25:20.0,475,3787,4178,1,"{""id"":55,""items"":[{""distance"":0,""duration"":0,""...",traffic,11.0,32.0,0.496820,0.434923,-1,"' 19423207912142002', ' 19423207912142014', ' ...","' 8640', ' 8880', ' 15800', ' 960', ' 6080', '...","' 15', ' 15', ' 15', ' 15', ' 50', ' 45', ' 20...","' 36', ' 37', ' 45', ' 4', ' 15', ' 52', ' 90'...","' 2', ' 2', ' 2', ' 2', ' -1', ' -1', ' -1', '...",2,-1
2,2,2,2,42,0,2020-12-08 16:05:26.0,146,837,797,2,"{""id"":1443,""items"":[{""distance"":0,""duration"":0...",traffic,1.0,7.0,0.094503,0.726939,-1,"' 140060152397995913', ' 140060152397995843', ...","' 24720', ' 15360', ' 25800', ' 2400', ' 15421...","' 15', ' 15', ' 15', ' 15', ' 19', ' 19', ' 19...","' 103', ' 64', ' 45', ' 10', ' 55', ' 15', ' 2...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'...",2,-1
3,3,3,3,18,0,2020-12-10 07:54:23.0,152,578,490,1,"{""id"":0,""items"":[{""distance"":0,""duration"":0,""e...",traffic,14.0,17.0,0.796066,0.476703,-1,"' 140060152255006264', ' 140060152255006409', ...","' 9360', ' 11960', ' 36960', ' 23169', ' 39320...","' 15', ' 15', ' 15', ' 26', ' 15', ' 15'","' 39', ' 29', ' 154', ' 59', ' 143', ' 49'","' 2', ' 2', ' 2', ' 2', ' 2', ' 2'",1,-1
4,4,4,4,61,21,2020-12-07 15:47:28.0,375,2086,2081,2,"{""id"":22,""items"":[{""distance"":0,""duration"":0,""...",traffic,5.0,10.0,0.179399,0.393122,-1,"' 19423207912137938', ' 19423207912125448', ' ...","' 12208', ' 18313', ' 17880', ' 13824', ' 5040...","' 23', ' 23', ' 25', ' 25', ' 25', ' 25', ' 25...","' 78', ' 117', ' 20', ' 96', ' 35', ' 54', ' 1...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'...",2,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20988,20988,20988,20988,57,13,2020-12-08 07:18:45.0,367,4427,4450,5,"{""id"":27,""items"":[{""distance"":0,""duration"":0,""...",traffic,1.0,145.0,0.441659,1.000000,-1,"' 19423207912140083', ' 19423207912127828', ' ...","' 5760', ' 17150', ' 4320', ' 3780', ' 2520', ...","' 15', ' 40', ' 40', ' 40', ' 40', ' 40', ' 40...","' 24', ' 135', ' 48', ' 42', ' 28', ' 21', ' 6...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'...",1,-1
20989,20989,20989,20989,39,10,2020-12-08 13:31:20.0,306,2907,2961,1,"{""id"":28,""items"":[{""distance"":0,""duration"":0,""...",traffic,6.0,7.0,0.882815,0.109360,-1,"' 19423207912147914', ' 19423207912147908', ' ...","' 2880', ' 19920', ' 4560', ' 3840', ' 10466',...","' 15', ' 15', ' 15', ' 15', ' 27', ' 27', ' 27...","' 12', ' 83', ' 19', ' 16', ' 41', ' 44', ' 13...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'...",2,-1
20990,20990,20990,20990,64,0,2020-12-08 07:18:40.0,225,1194,1462,3,"{""id"":282,""items"":[{""distance"":0,""duration"":0,...",traffic,4.0,41.0,0.289654,0.474029,-1,"' 19423207912117869', ' 19423207912118121', ' ...","' 3600', ' 12168', ' 4968', ' 1224', ' 8064', ...","' 50', ' 50', ' 50', ' 50', ' 25', ' 25', ' 30...","' 50', ' 169', ' 69', ' 17', ' 56', ' 20', ' 1...","' 2', ' 2', ' 2', ' 2', ' 2', ' -1', ' -1', ' ...",1,-1
20991,20991,20991,20991,41,19,2020-12-08 18:30:29.0,356,3891,3977,3,"{""id"":26,""items"":[{""distance"":0,""duration"":0,""...",traffic,1.0,21.0,0.549844,0.073690,-1,"' 19423207912122025', ' 19423207912122123', ' ...","' 6276', ' 23452', ' 692', ' 22971', ' 18257',...","' 39', ' 23', ' 26', ' 21', ' 28', ' 23', ' 23...","' 68', ' 54', ' 5', ' 134', ' 142', ' 136', ' ...","' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2', ' 2'...",2,-1


In [10]:
json.loads(df.iloc[2, 15])['items'][1]['edges'][1]['traffic_type']

2

In [11]:
b_in_dict =  'traffic_type' in json.loads(df.iloc[2, 15])['items'][1]['edges'][1]

In [12]:
b_in_dict 

True

In [13]:
import h5py

In [15]:
filename = "metr-la.h5"

with h5py.File(filename, "r") as f:
    # List all groups
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]

    # Get the data
    data = list(f[a_group_key])

Keys: <KeysViewHDF5 ['df']>


In [17]:
data 

['axis0', 'axis1', 'block0_items', 'block0_values']

In [19]:
from pandas import (
    DataFrame, HDFStore
)

In [20]:
store = HDFStore("metr-la.h5")

In [21]:
store 

<class 'pandas.io.pytables.HDFStore'>
File path: metr-la.h5

In [59]:
pd.read_csv("routes_abakan_clear.csv").drop(["Unnamed: 0", "Unnamed: 0.1", "start_utc", "end_utc", "ETA", "build_utc", "build_timestamp"], axis = 1).to_csv("routes_abakan_clear_rev2.csv")

In [65]:
pd.read_csv("routes_omsk_clear_rev2.csv").drop(["Unnamed: 0", "Unnamed: 0.1", "Unnamed: 0.1.1", "start_utc", "end_utc", "ETA", "build_utc", "build_timestamp"], axis = 1).to_csv("routes_omsk_clear_rev2.csv")