In [574]:
FOLDER = 'resources/'

import numpy as np
import pandas as pd
import time, datetime
from ast import literal_eval
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile

#%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rc('font', family='Times New Roman')

pd.set_option('max_rows', 50)

time_format = '%b %d, %H:%M'

start_time = time.time()
current_time = pd.read_csv(FOLDER + 'current_time.csv').current_time[0]
twr          = pd.read_csv(FOLDER + 'team_work_region.csv', converters={'twr':str})
links        = pd.read_csv(FOLDER + 'link.csv')
stations     = pd.read_csv(FOLDER + 'station.csv', converters={'station':str})
train_info   = pd.read_csv(FOLDER + 'train_info.csv', converters={'train': str, 'st_from':str, 'st_to':str, 'oper_location':str,
                                                                 'st_from':str, 'st_to':str})
train_plan   = pd.read_csv(FOLDER + 'slot_train.csv', converters={'train': str, 'st_from':str, 'st_to':str})
loco_info    = pd.read_csv(FOLDER + 'loco_attributes.csv', converters={'train':str, 'loco':str, 'depot':str,
                                                                      'st_from':str, 'st_to':str})
loco_plan    = pd.read_csv(FOLDER + 'slot_loco.csv', converters={'train':str, 'loco':str, 'st_from':str, 'st_to':str})
team_info    = pd.read_csv(FOLDER + 'team_attributes.csv', converters={'team':str,'depot':str, 'oper_location':str, \
                                                                 'st_from':str, 'st_to':str, 'loco':str, 'depot_st':str})
team_plan    = pd.read_csv(FOLDER + 'slot_team.csv', converters={'team':str,'loco':str, 'st_from':str, 'st_to':str})
loco_series  = pd.read_csv(FOLDER + 'loco_series.csv')

team_info.regions = team_info.regions.apply(literal_eval)
st_names = stations[['station', 'name', 'esr']].drop_duplicates().set_index('station')
print('Planning start time: %s (%d)' % (time.strftime(time_format, time.localtime(current_time)), current_time))

Planning start time: Jul 06, 14:41 (1467805287)


In [575]:
# Мержим таблицы _plan и _info для поездов, локомотивов и бригад
# Добавляем во все таблицы названия станций на маршруте и времена отправления/прибытия в читабельном формате

def add_info(df):    
    if 'st_from' in df.columns:
        df['st_from_name'] = df.st_from.map(st_names.name)
    if 'st_to' in df.columns:
        df['st_to_name'] = df.st_to.map(st_names.name)
    if 'time_start' in df.columns:
        df['time_start_norm'] = df.time_start.apply(lambda x: time.strftime(time_format, time.localtime(x)))
    if 'time_end' in df.columns:
        df['time_end_norm'] = df.time_end.apply(lambda x: time.strftime(time_format, time.localtime(x)))
    if 'oper_location' in df.columns:
        df['oper_location_name'] = df.oper_location.map(st_names.name)    
        df.oper_location_name.fillna(0, inplace=True)
    if ('oper_location' in df.columns) & ('st_from' in df.columns) & ('st_to' in df.columns):        
        df['loc_name'] = df.oper_location_name
        df.loc[df.loc_name == 0, 'loc_name'] = df.st_from_name + ' - ' + df.st_to_name
    
add_info(train_plan)
add_info(loco_plan)
add_info(team_plan)
add_info(loco_info)
add_info(team_info)
add_info(train_info)
train_plan = train_plan.merge(train_info, on='train', suffixes=('', '_info'), how='left')
loco_plan = loco_plan.merge(loco_info, on='loco', suffixes=('', '_info'), how='left')
team_plan = team_plan.merge(team_info, on='team', suffixes=('', '_info'), how='left')
team_plan['team_type'] = team_plan.team.apply(lambda x: 'Реальная' if str(x)[0] == '2' else 'Фейковая')

In [576]:
def nice_time(t):
    #if not time_format: time_format = '%b %d, %H:%M'
    return time.strftime(time_format, time.localtime(t)) if t > 0 else ''

def nice_print(s, cols, num=True):
    if num:
        print(s.reset_index()[cols].to_string())
    else:
        print(s[cols].to_string(index=False))

In [577]:
st_name = 'ИРКУТСК-СОРТИРОВОЧНЫЙ'
team_plan['depot_name'] = team_plan.depot.map(st_names.name)
cols = ['team', 'number', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'loco']
team_plan[(team_plan.st_from_name == st_name) & (team_plan.state.isin([0, 1]))
          & (team_plan.time_start >= current_time) & (team_plan.time_start < current_time + 24 * 3600)].sort_values('time_start')[cols]

team_plan[team_plan.number == 9205004408][cols]

Unnamed: 0,team,number,st_from_name,st_to_name,time_start_norm,time_end_norm,state,loco
1427,200200250998,9205004408,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 12:44","Jul 06, 13:08",1,200200054143
1428,200200250998,9205004408,ГОНЧАРОВО,БОЛЬШОЙ ЛУГ,"Jul 06, 13:08","Jul 06, 13:52",1,200200054143
1429,200200250998,9205004408,БОЛЬШОЙ ЛУГ,АНДРИАНОВСКАЯ,"Jul 06, 13:52","Jul 06, 14:52",1,200200054143
1430,200200250998,9205004408,АНДРИАНОВСКАЯ,АНГАСОЛКА,"Jul 06, 14:52","Jul 06, 15:06",1,200200054143
1431,200200250998,9205004408,АНГАСОЛКА,СЛЮДЯНКА II,"Jul 06, 15:06","Jul 06, 15:31",1,200200054143
1432,200200250998,9205004408,СЛЮДЯНКА II,СЛЮДЯНКА I,"Jul 06, 15:31","Jul 06, 15:37",1,200200054143
1433,200200250998,9205004408,СЛЮДЯНКА I,БАЙКАЛЬСК,"Jul 06, 15:37","Jul 06, 16:26",1,200200054143
1434,200200250998,9205004408,БАЙКАЛЬСК,МЫСОВАЯ,"Jul 06, 16:26","Jul 06, 18:31",1,200200054143
1435,200200250998,9205004408,МЫСОВАЯ,УЛАН-УДЭ,"Jul 06, 18:31","Jul 06, 21:13",1,200200054143
1436,200200250998,9205004408,УЛАН-УДЭ,УЛАН-УДЭ,"Jul 06, 21:43","Jul 07, 04:18",4,-1


In [578]:
team_info['depot_name'] = team_info.depot.map(st_names.name)
team_info['in_plan'] = team_info.team.isin(team_plan[team_plan.state == 1].team)
team_info['oper_time_f'] = team_info.oper_time.apply(nice_time)
cols = ['team', 'number', 'depot_name', 'state']
a = team_info[(team_info.ttype == 1) & (team_info.loc_name == 'СЛЮДЯНКА I') 
          #& (team_info.depot_name.isin(['СЛЮДЯНКА I']))
          & (team_info.oper_time < current_time + 24 * 3600)]
a.depot_name.value_counts()

СЛЮДЯНКА I               55
ЗИМА                     17
ИРКУТСК-СОРТИРОВОЧНЫЙ    14
УЛАН-УДЭ                  9
Name: depot_name, dtype: int64

In [579]:
b = a[a.in_plan == True]
#cols = ['team', 'number', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'loco']
#team_plan[team_plan.team.isin(b.team)][cols]
cols = ['team', 'regions', 'number', 'depot_name', 'ready_type', 'depot_st', 'depot_time', 'return_st', 'oper_time_f', 'state']
b = a[(a.in_plan) & (a.regions.apply(lambda x: '2002118236' in x))].sort_values('oper_time')[cols]
print(b.team.count()) # всего 30 слюдянковских бригад, которые могут ездить до Иркутска
# Еще 29 бригад из Зимы и Иркутска - они тоже могут ехать в нечетную сторону
team_plan[(team_plan.team.isin(b.team)) & (team_plan.st_from_name == 'СЛЮДЯНКА I') & (team_plan.state.isin([0, 1]))].st_to_name.value_counts()

23


СЛЮДЯНКА II    17
БАЙКАЛЬСК       7
Name: st_to_name, dtype: int64

In [580]:
twr = pd.read_csv(FOLDER + 'team_work_region.csv')
twr['link'] = twr.link.apply(literal_eval)
twr['st_from_name'] = twr.link.apply(lambda x: x[0]).map(st_names.name)
twr['st_to_name'] = twr.link.apply(lambda x: x[1]).map(st_names.name)
twr[twr.twr == 2002118236]

Unnamed: 0,twr,link,st_from_name,st_to_name
2994,2002118236,"[2000037090, 2000036958]",КАЯ,ИРКУТСК-ПАССАЖИРСКИЙ
2995,2002118236,"[2000036958, 2000037090]",ИРКУТСК-ПАССАЖИРСКИЙ,КАЯ
2996,2002118236,"[2000037086, 2000036958]",ВОЕННЫЙ ГОРОДОК,ИРКУТСК-ПАССАЖИРСКИЙ
2997,2002118236,"[2000036958, 2000037086]",ИРКУТСК-ПАССАЖИРСКИЙ,ВОЕННЫЙ ГОРОДОК
2998,2002118236,"[2000037100, 2000037090]",ГОНЧАРОВО,КАЯ
2999,2002118236,"[2000037090, 2000037100]",КАЯ,ГОНЧАРОВО
3000,2002118236,"[2000036956, 2000037100]",ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО
3001,2002118236,"[2000037100, 2000036956]",ГОНЧАРОВО,ИРКУТСК-СОРТИРОВОЧНЫЙ
3002,2002118236,"[2000037100, 2000037132]",ГОНЧАРОВО,БОЛЬШОЙ ЛУГ
3003,2002118236,"[2000037132, 2000037100]",БОЛЬШОЙ ЛУГ,ГОНЧАРОВО


In [581]:
train_plan['train_type'] = train_plan.train.apply(lambda x: x[0])
train_plan[(train_plan.st_from_name == 'СЛЮДЯНКА I') & (train_plan.st_to_name == 'СЛЮДЯНКА II')
          & (train_plan.time_start >= current_time) & (train_plan.time_start < current_time + 24 * 3600)].train_type.value_counts()

# Всего 96 поездов в нечетную сторону из Слюдянки!!!
# Из них всего 15 локомотивов резервом и 81 настоящий поезд

2    58
8    16
9     2
Name: train_type, dtype: int64

Итого на Слюдянку надо 96 бригад. А есть только 59 (на начало планирования)
Надо где-то найти еще 37. 
Еще 10 бригад едут от Иркутска в Слюдянку на начало планирования. Осталось 27.

In [582]:
cols = ['team', 'number', 'depot_name', 'depot_st', 'depot_time', 'state', 'loc_name', 'oper_time_f']
team_info['link'] = list(zip(team_info.st_from, team_info.st_to))
links = pd.read_csv(FOLDER + 'link.csv', dtype={'st_from':str, 'st_to':str})
links['link'] = list(zip(links.st_from, links.st_to))
team_info['dir'] = team_info.link.map(links.set_index('link')['dir'])
team_info[(team_info.depot_name.isin(['ЗИМА', 'ИРКУТСК-СОРТИРОВОЧНЫЙ'])) & (team_info.state.isin(['2','3','4']) == False)
         & (team_info['dir'] == 0)].sort_values('oper_time')[cols]

Unnamed: 0,team,number,depot_name,depot_st,depot_time,state,loc_name,oper_time_f
120,200200189748,9203000770,ЗИМА,2000036710,1467784500,1,ЗИМА - ЗАЛАРИ,"Jul 06, 09:50"
246,200200193220,9203001037,ЗИМА,2000036710,1467784800,1,ЗИМА - ЗАЛАРИ,"Jul 06, 10:37"
2710,200200259891,9205002384,ИРКУТСК-СОРТИРОВОЧНЫЙ,-1,-1,0,СЛЮДЯНКА I - БАЙКАЛЬСК,"Jul 06, 10:58"
601,210215845614,9205001327,ИРКУТСК-СОРТИРОВОЧНЫЙ,2000036956,1467795000,1,ИРКУТСК-СОРТИРОВОЧНЫЙ - ГОНЧАРОВО,"Jul 06, 11:54"
499,200200250998,9205004408,ИРКУТСК-СОРТИРОВОЧНЫЙ,2000036956,1467798000,1,ИРКУТСК-СОРТИРОВОЧНЫЙ - ГОНЧАРОВО,"Jul 06, 12:44"
761,200200236578,9203025883,ЗИМА,2000036710,1467775800,1,БАТАРЕЙНАЯ - ИРКУТСК-СОРТИРОВОЧНЫЙ,"Jul 06, 12:47"
762,200200222169,9203003896,ЗИМА,2000036710,1467781800,1,КАСЬЯНОВКА - МАЛЬТА,"Jul 06, 12:48"
265,200200081102,9203003387,ЗИМА,2000036710,1467791400,1,ЗИМА - ЗАЛАРИ,"Jul 06, 12:49"
1000,200200145720,9203000459,ЗИМА,2000036710,1467794700,1,ЗИМА - ЗАЛАРИ,"Jul 06, 12:51"
527,200200216964,9203008975,ЗИМА,2000036710,1467783000,1,КАСЬЯНОВКА - МАЛЬТА,"Jul 06, 13:02"


In [583]:
cols = ['team', 'number', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'loco']
team_plan[team_plan.team == '200200035170'][cols]

Unnamed: 0,team,number,st_from_name,st_to_name,time_start_norm,time_end_norm,state,loco


In [584]:
cols = ['team', 'number', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'loco']
team_plan[(team_plan.depot_name == st_name) & (team_plan.st_to_name == 'ГОНЧАРОВО')
          & (team_plan.time_start >= current_time) & (team_plan.time_start < current_time + 8 * 3600)
         & (team_plan.state == 1) & (team_plan.st_from_name == st_name)].sort_values('time_start')[cols]

Unnamed: 0,team,number,st_from_name,st_to_name,time_start_norm,time_end_norm,state,loco
35009,200200185651,9205008005,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 15:05","Jul 06, 15:29",1,200200063444
13116,200200223247,9205002166,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 15:25","Jul 06, 15:49",1,200200106911
2145,220204230288,9205031417,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 17:15","Jul 06, 17:39",1,200211339328
12856,210216297576,9205031390,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 17:45","Jul 06, 18:09",1,200200105050
32525,200200274856,9205007634,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 17:55","Jul 06, 18:19",1,200200139580
33960,200200194940,9205003211,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 18:05","Jul 06, 18:29",1,200200086564
46037,210213617502,9205030412,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 18:15","Jul 06, 18:39",1,200200102549
19942,200200130295,9205004606,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 18:45","Jul 06, 19:09",1,200200097482
41,210215999461,9205031365,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 18:55","Jul 06, 19:19",1,200200094906
18244,200200244250,9205003866,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 06, 19:55","Jul 06, 20:19",1,200200106390


In [585]:
team_info[(team_info.depot_st == '-1') & (team_info.depot_time == -1)].team.count() / team_info.team.count()

0.08252015273652949

In [586]:
train_info['in_plan'] = train_info.train.isin(train_plan.train)
train_info[train_info.in_plan == False].train.count() / train_info.train.count()

0.085810431856421759

In [587]:
print(nice_time(current_time))
train_info['oper_time_f'] = train_info.oper_time.apply(nice_time)
train_info[train_info.in_plan == False][['train', 'number', 'ind434', 'joint', 'oper_time_f', 'loc_name']]

Jul 06, 14:41


Unnamed: 0,train,number,ind434,joint,oper_time_f,loc_name
51,200231224031,9209,9826-352-9700,-1,"Jul 06, 12:55",РУЖИНО - БЛОКПОСТ 1571 КМ
54,200201266998,6566,0000-006-6566,-1,"Jul 06, 13:24",РУДНОГОРСК - ТУШАМА
81,200200942162,108,0000-005-0108,-1,"Jul 06, 14:11",ЗАУДИНСКИЙ - ТАЛЬЦЫ
99,200231225832,9240,9212-015-9320,-1,"Jul 06, 13:22",КАСЬЯНОВКА - МАЛЬТА
120,200201219013,351,0000-006-0352,-1,"Jul 06, 11:21",ВАНИНО - ТОКИ
132,200230888238,2050,8927-873-9687,-1,"Jul 06, 13:00",УДА II - ТУЛУН
136,220207452064,1588,8646-281-9878,-1,"Jul 06, 10:54",ШИЛКА-ТОВАРНАЯ - ШИЛКА-ТОВАРНАЯ
149,200231495307,9224,9431-384-9491,-1,"Jul 06, 12:01",ТАРСКАЯ - УРУЛЬГА
152,220206829738,2318,0154-015-9893,-1,"Jul 06, 12:11",ШИЛКА-ТОВАРНАЯ - ШИЛКА-ТОВАРНАЯ
159,200231054141,9201,9826-350-9700,-1,"Jul 06, 08:41",РУЖИНО - БЛОКПОСТ 1571 КМ


In [588]:
def get_station(name):
    s = stations[stations.name.apply(lambda x: name.upper() in x)]
    return s.station.unique()[0], s.name.unique()[0]    

In [589]:
print(nice_time(current_time))
st, st_name = get_station('чернышев')
team_info['in_plan'] = team_info.team.isin(team_plan[team_plan.state == 1].team)
team_info[(team_info.oper_location == st)           
          & (team_info.regions.apply(lambda x: '200290072545' in x)) & (team_info.depot_time != -1)]\
            [['team', 'regions', 'depot_name', 'loc_name', 'oper_time_f', 'in_plan']].sort_values(['depot_name', 'oper_time_f'])
#a = [2002118291, 200290072545]
#twr[twr.twr.isin(a)]

Jul 06, 14:41


Unnamed: 0,team,regions,depot_name,loc_name,oper_time_f,in_plan
2556,200200278309,"[200290072545, 2002118291]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 07:22",False
4268,200200134612,"[200290072545, 2002118291]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 07:23",False
4211,200200242540,"[2002118291, 200290072545]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 10:37",False
2299,200200195345,[200290072545],ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 10:49",False
2210,200200223934,"[200290072545, 2002118291]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 11:25",True
1581,200200260416,"[2002118291, 200290072545]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 12:35",True
1798,200200103769,"[2002118291, 200290072545]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 12:55",True
3386,200200167618,"[200290072545, 2002118291]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 13:00",True
3446,200200231871,"[2002118291, 200290072545]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 13:15",True
4342,200200105029,"[200290072545, 2002118291]",ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,"Jul 06, 13:32",True


In [590]:
loco_plan['loco_time'] = list(zip(loco_plan.loco, loco_plan.time_start))
team_plan['loco_time'] = list(zip(team_plan.loco, team_plan.time_start))
loco_plan['team'] = loco_plan.loco_time.map(team_plan.drop_duplicates('loco_time').set_index('loco_time').team)
loco_plan[loco_plan.loco == '200215585260'][['loco', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'team']]

Unnamed: 0,loco,st_from_name,st_to_name,time_start_norm,time_end_norm,team


In [591]:
slot = pd.read_csv(FOLDER + 'slot.csv', dtype={'st_from':str, 'st_to':str})
add_info(slot)
slot[(slot.st_from == st) & (slot.time_start > current_time) & (slot.time_start < current_time + 24 * 3600) 
    & (slot.st_to_name == 'КУЭНГА')].sort_values('time_start')\
[['slot', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm']]

Unnamed: 0,slot,st_from_name,st_to_name,time_start_norm,time_end_norm
1250,200230634756,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 14:42","Jul 06, 15:51"
1598,200230634785,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 15:03","Jul 06, 16:12"
13770,200230624941,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 15:06","Jul 06, 16:15"
2163,200230634829,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 15:14","Jul 06, 16:23"
3494,200230634938,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 15:30","Jul 06, 16:39"
29113,200230714959,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 15:50","Jul 06, 17:01"
2663,200230634861,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 16:17","Jul 06, 17:26"
14673,200230654848,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 16:40","Jul 06, 17:49"
15634,200230665064,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 16:54","Jul 06, 18:03"
5316,200230645062,ЧЕРНЫШЕВСК-ЗАБАЙКАЛЬСКИЙ,КУЭНГА,"Jul 06, 17:20","Jul 06, 18:29"


In [592]:
print(nice_time(current_time))
slot[slot.time_start_norm.apply(lambda x: 'Jul 04' in x)].slot.drop_duplicates().count()

Jul 06, 14:41


0

In [593]:
not_used = [9205004609, 9205007593, 9205000742, 9205002994, 9205007639, 9205004113, 9205004564, 9205031367, 9205031292, 
            9205000564, 9205002681, 9205008860, 9205004041, 9205002635, 9205008056, 9205000264, 9205003316, 9205008377, 
            9205007941, 9205002097, 9205007883, 9205030603, 9205003779, 9205003550, 9205030823, 9205003873, 9205004359, 
            9205008112, 9205000326, 9205005114, 9205007141, 9205001325, 9205002009, 9205000629, 9205002708, 9205004902, 
            9205007023, 9205007920, 9205004884, 9205031354, 9205002345, 9205007837, 9205001021, 9205002942, 9205004656, 
            9205008012, 9205007263, 9205007663, 9205000842, 9205000608]
team_info['depot_time_f'] = team_info.depot_time.apply(nice_time)
cols = ['team', 'ttype', 'number', 'regions', 'depot_st', 'depot_time_f', 'return_st', 'return_time', 'oper_time_f', 'loc_name', 'state', 'loco']
team_info[(team_info.number.isin(not_used)) 
          & (team_info.state == '3') 
         ].sort_values('depot_time')[cols]
#twr[twr.twr == 2002118233].sort_values('st_from_name')

Unnamed: 0,team,ttype,number,regions,depot_st,depot_time_f,return_st,return_time,oper_time_f,loc_name,state,loco
1738,200200103149,1,9205007920,"[2002118234, 2002118233]",2000036956,"Jul 06, 14:26",2000037186,1467708000,"Jul 06, 14:26",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
1625,200200150128,1,9205001325,"[2002118234, 2002118233]",2000036956,"Jul 06, 14:29",2000036710,1467722100,"Jul 06, 14:29",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
4602,200200208130,1,9205002097,[2002118233],2000036956,"Jul 06, 22:21",2000037186,1467737400,"Jul 06, 22:21",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
1389,200200249699,1,9205030603,[2002118233],2000036956,"Jul 06, 23:00",-1,-1,"Jul 06, 23:00",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
2288,200200209907,1,9205007663,"[2002118233, 2002118234]",2000036956,"Jul 07, 02:27",2000037186,1467771600,"Jul 07, 02:27",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
4676,200200155156,1,9205000629,"[2002118234, 2002118233]",2000036956,"Jul 07, 04:25",2000036710,1467768300,"Jul 07, 04:25",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1
4115,200200274830,1,9205003873,[2002118233],2000036956,"Jul 07, 06:00",2000037186,1467778800,"Jul 07, 06:00",ИРКУТСК-СОРТИРОВОЧНЫЙ,3,-1


In [594]:
st, st_name = get_station('иркутск-с')
cols = ['team', 'depot_name', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state']
team_plan['depot_name'] = team_plan.team.map(team_info.set_index('team').depot_name)
ts = team_plan[team_plan.state.isin([0, 1])].drop_duplicates('team')
team_plan['fake_depot_name'] = team_plan.team.map(ts.set_index('team').st_from_name)
team_plan.depot_name.fillna(team_plan.fake_depot_name, inplace=True)
team_plan['team_type'] = team_plan.team.apply(lambda x: int(x[0]))
team_plan[(team_plan.st_from == st) & (team_plan.depot_name == st_name) & (team_plan.team_type == 7)
          & (team_plan.time_start >= current_time) 
          & (team_plan.time_start < current_time + 24 * 3600) & (team_plan.state.isin([0, 1]))].sort_values('time_start')[cols]

Unnamed: 0,team,depot_name,st_from_name,st_to_name,time_start_norm,time_end_norm,state
28818,777700000537,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 18:29","Jul 06, 18:31",1
28877,777700000539,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 18:29","Jul 06, 18:31",1
23088,777700001508,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 19:16","Jul 06, 19:18",1
32556,777700000402,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 20:23","Jul 06, 20:25",1
32629,777700000401,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 22:22","Jul 06, 22:24",1
32609,777700000400,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 23:03","Jul 06, 23:05",1
28898,777700000538,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 07, 00:01","Jul 07, 00:03",1
29791,777700000683,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,ГОНЧАРОВО,"Jul 07, 00:45","Jul 07, 01:09",1
32778,777700000398,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 07, 01:11","Jul 07, 01:13",1
28839,777700000536,ИРКУТСК-СОРТИРОВОЧНЫЙ,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 07, 02:05","Jul 07, 02:07",1


In [595]:
cols = ['train', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm']
train_plan[(train_plan.st_from == st) 
           & (train_plan.time_start >= current_time) 
           & (train_plan.time_start < current_time + 24 * 3600)].sort_values('time_start')[cols].st_to_name.value_counts()

БАТАРЕЙНАЯ         76
ГОНЧАРОВО          73
ВОЕННЫЙ ГОРОДОК     1
Name: st_to_name, dtype: int64

In [596]:
loco_plan[(loco_plan.st_from == st) & (loco_plan.state.isin([0, 1]))
           & (loco_plan.time_start >= current_time) 
           & (loco_plan.time_start < current_time + 24 * 3600)].sort_values('time_start')[cols].st_to_name.value_counts()

БАТАРЕЙНАЯ         75
ГОНЧАРОВО          39
ВОЕННЫЙ ГОРОДОК     1
Name: st_to_name, dtype: int64

In [597]:
team_plan[(team_plan.st_from == st) & (team_plan.state.isin([0, 1])) & (team_plan.depot_name == st_name)
           & (team_plan.time_start >= current_time) 
           & (team_plan.time_start < current_time + 24 * 3600)].sort_values('time_start').st_to_name.value_counts()

БАТАРЕЙНАЯ         48
ГОНЧАРОВО          23
ВОЕННЫЙ ГОРОДОК    13
Name: st_to_name, dtype: int64

In [598]:
team_plan['all_states'] = team_plan.team.map(team_plan.groupby('team').state.unique())
cols = ['team', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'all_states', 'state_info']
team_plan[(team_plan.st_from == st) & (team_plan.state.isin([0])) & (team_plan.depot_name == st_name)
           & (team_plan.time_start >= current_time) 
           & (team_plan.time_start < current_time + 24 * 3600)].sort_values('time_start')[cols]

Unnamed: 0,team,st_from_name,st_to_name,time_start_norm,time_end_norm,all_states,state_info
4207,200200120486,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 14:54","Jul 06, 14:55","[0, 2, 1, 4]",5
15289,200200196944,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 06, 15:50","Jul 06, 15:52","[0, 2, 1, 4]",5
45679,200200150128,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 16:23","Jul 06, 16:25","[0, 4]",3
23317,200200165803,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 16:56","Jul 06, 16:58","[0, 2, 1]",3
42978,200200134533,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,"Jul 06, 21:12","Jul 06, 21:13","[0, 4]",3
45057,200200124135,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 06, 21:23","Jul 06, 21:25","[0, 2, 1]",3
37250,200200203400,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 06, 21:23","Jul 06, 21:25","[0, 2, 1]",3
9956,200200249699,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 07, 01:17","Jul 07, 01:19","[0, 2, 1]",3
36145,200200254961,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 07, 01:17","Jul 07, 01:19","[0, 2, 1]",3
17682,200200166388,ИРКУТСК-СОРТИРОВОЧНЫЙ,ВОЕННЫЙ ГОРОДОК,"Jul 07, 01:17","Jul 07, 01:19","[0, 2, 1]",3


In [599]:
slot['link'] = list(zip(slot.st_from, slot.st_to))
a = slot.groupby(['link', 'time_start']).slot.count()
a[a > 1]
b = slot.set_index(['link', 'time_start']).join(a, rsuffix='_').reset_index()[['link', 'st_from_name', 'st_to_name', 'time_start_norm', 'slot_']]

In [600]:
files = [files for root, directories, files in os.walk('./resources/others')][0]
times = {}
os.chdir('./resources/others')
try:
    for f in files:
        if 'Бригады_УТХ' in f:
            times[f] = int(os.path.getmtime(f))    

    if times != {}:
        uth_filename = max(times, key=lambda k: times[k])
        date_modified = times[uth_filename]
    else:
        uth_filename = 'Бригады_УТХ' + '.xls'
        date_modified = 0
    print('Данные об УТХ-бригадах взяты из файла %s (дата изменения %s)' % (uth_filename, nice_time(date_modified)))
    os.chdir('..')
    os.chdir('..')
except:
    os.chdir('..')
    os.chdir('..')

Данные об УТХ-бригадах взяты из файла Бригады_УТХ_11_01_34.xls (дата изменения Jul 06, 11:01)


In [601]:
import os
files = [files for root, directories, files in os.walk('./input')][0]
files = [file for file in files if '20160703' in file]
files

['jason-FullPlannerPlugin.log.20160703-151742.zip',
 'jason-FullPlannerPlugin.log.20160703-171828.zip',
 'jason-FullPlannerPlugin.log.20160703-201827.zip']

In [602]:
#import zipfile
#for file in files:
#    zip_ref = zipfile.ZipFile('./input/' + file, 'r')
#    zip_ref.extractall('./input')
#    zip_ref.close()
#    %run read.py
#    slot = pd.read_csv(FOLDER + 'slot.csv', dtype={'st_from':str, 'st_to':str})
#    slot['time_start_f'] = slot.time_start.apply(nice_time)
#    slot['is_jul_03'] = slot.time_start_f.apply(lambda x: 'Jul 03' in x)
#    slot['is_jul_04'] = slot.time_start_f.apply(lambda x: 'Jul 04' in x)
#    slot['is_jul_05'] = slot.time_start_f.apply(lambda x: 'Jul 05' in x)
#    slot.head(10)
#    print('Jul 03: %d\nJul 04: %d\nJul 05: %d' % 
#          (slot[slot.is_jul_03].slot.count(), slot[slot.is_jul_04].slot.count(), slot[slot.is_jul_05].slot.count()))

In [603]:
slot = pd.read_csv(FOLDER + 'slot.csv', dtype={'st_from':str, 'st_to':str})
#slot['time_start_f'] = slot.time_start.apply(nice_time)
add_info(slot)
slot[(slot.st_from_name == 'ИРКУТСК-СОРТИРОВОЧНЫЙ') & (slot.time_start_norm.apply(lambda x: 'Jul 05' in x))].st_to_name.value_counts()

Series([], Name: st_to_name, dtype: int64)

In [604]:
print('Время начала планирования: %s' % (time.strftime(time_format, time.localtime(current_time))))
team_info['dt_norm'] = team_info.depot_time.apply(lambda x: time.strftime(time_format, time.localtime(x)) if x > 0 else x)
team_info['rt_norm'] = team_info.return_time.apply(lambda x: time.strftime(time_format, time.localtime(x)) if x > 0 else x)
team_info['rst_norm'] = team_info.rest_time.apply(lambda x: time.strftime(time_format, time.localtime(x)) if x > 0 else x)
team_info['rest_dep_delta'] = np.round(((team_info.rest_time - team_info.depot_time) / 3600), 2)
team_info['return_dep_delta'] = np.round(((team_info.return_time - team_info.depot_time) / 3600), 2)

dep_less_rest = team_info[(team_info.depot_time < team_info.rest_time) & (team_info.dt_norm != -1)]
info_cols = ['team', 'ttype', 'number', 'dt_norm', 'rst_norm', 'rest_dep_delta', 'ready_type', 'state']
print('Всего %d бригад, у которых время последней явки в депо намного меньше (на 12+ часов) переданного времени начала отдыха. Примеры:' 
          % dep_less_rest[dep_less_rest.rest_dep_delta > 12].team.drop_duplicates().count())
print(dep_less_rest[dep_less_rest.rest_dep_delta > 12][info_cols].sort_values('rest_dep_delta', ascending=False).head(10).to_string(index=False))

dep_less_return = team_info[(team_info.depot_time < team_info.return_time) & (team_info.dt_norm != -1)]
info_cols = ['team', 'ttype', 'number', 'dt_norm', 'rst_norm', 'return_dep_delta', 'ready_type', 'state']
print('\nВсего %d бригад, у которых время последней явки в депо намного меньше (на 18+ часов) времени явки в пункте оборота. Примеры:' 
          % dep_less_return[dep_less_return.return_dep_delta > 12].team.drop_duplicates().count())
print(dep_less_return[dep_less_return.return_dep_delta > 12][info_cols].sort_values('return_dep_delta', ascending=False).head(10).to_string(index=False))

Время начала планирования: Jul 06, 14:41
Всего 272 бригад, у которых время последней явки в депо намного меньше (на 12+ часов) переданного времени начала отдыха. Примеры:
         team  ttype      number        dt_norm       rst_norm  rest_dep_delta ready_type state
 200200140550      1  9209009656  Jul 04, 12:52  Jul 06, 11:13           46.35     return     4
 200200255414      1  9413047233  Jul 04, 23:30  Jul 06, 13:20           37.83     return     4
 200200224994      1  9403014461  Jul 05, 00:40  Jul 06, 09:00           32.33     return     4
 200200224532      1  9403002213  Jul 05, 00:40  Jul 06, 08:31           31.85     return     4
 200200168453      1  9403023827  Jul 05, 02:00  Jul 06, 09:00           31.00     return     4
 200200083038      1  9413015182  Jul 04, 22:10  Jul 06, 03:11           29.02     return     9
 200200196518      1  9606003264  Jul 05, 08:00  Jul 06, 13:00           29.00     return     4
 200200281922      1  9401059241  Jul 04, 23:20  Jul 06, 04:0

In [605]:
nice_time(team_info.oper_time.max())

'Jul 07, 06:20'

In [606]:
nice_time(current_time)

'Jul 06, 14:41'

In [607]:
inds = ['9379-342-9861', '8626-125-9861', '8622-692-9861', '8630-837-9861']
train_info['oper_time_f'] = train_info.oper_time.apply(nice_time)
#print(train_info[train_info.ind434 == ind][['train', 'ind434', 'number', 'oper_time_f', 'loc_name']].to_string(index=False))
for ind in inds:
    a = train_plan[train_plan.ind434 == ind][['train', 'number', 'ind434', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm']]
    print(a.to_string(index=False), '\n')

Empty DataFrame
Columns: [train, number, ind434, st_from_name, st_to_name, time_start_norm, time_end_norm]
Index: [] 

Empty DataFrame
Columns: [train, number, ind434, st_from_name, st_to_name, time_start_norm, time_end_norm]
Index: [] 

Empty DataFrame
Columns: [train, number, ind434, st_from_name, st_to_name, time_start_norm, time_end_norm]
Index: [] 

Empty DataFrame
Columns: [train, number, ind434, st_from_name, st_to_name, time_start_norm, time_end_norm]
Index: [] 



In [608]:
teams = [9608009639, 9608003284, 9608000026, 9608010886]
for team in teams:
    print(team_plan[team_plan.number == team][['team', 'number', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'loco', 'state']].to_string(index=False), '\n')

         team      number          st_from_name            st_to_name time_start_norm  time_end_norm          loco  state
 200200195927  9608009639           СМОЛЯНИНОВО           СМОЛЯНИНОВО   Jul 06, 20:05  Jul 06, 21:35            -1      2
 200200195927  9608009639           СМОЛЯНИНОВО               ШКОТОВО   Jul 06, 21:35  Jul 06, 21:54  200200100951      1
 200200195927  9608009639               ШКОТОВО  АРТЕМ-ПРИМОРСКИЙ III   Jul 06, 21:54  Jul 06, 22:02  200200100951      1
 200200195927  9608009639  АРТЕМ-ПРИМОРСКИЙ III    АРТЕМ-ПРИМОРСКИЙ I   Jul 06, 22:02  Jul 06, 22:12  200200100951      1
 200200195927  9608009639    АРТЕМ-ПРИМОРСКИЙ I               УГЛОВАЯ   Jul 06, 22:12  Jul 06, 22:23  200200100951      1
 200200195927  9608009639               УГЛОВАЯ        АМУРСКИЙ ЗАЛИВ   Jul 06, 22:23  Jul 06, 22:32  200200100951      1
 200200195927  9608009639        АМУРСКИЙ ЗАЛИВ          НАДЕЖДИНСКАЯ   Jul 06, 22:32  Jul 06, 22:41  200200100951      1
 200200195927  960800963

In [609]:
a['link'] = list(zip(a.st_from_name, a.st_to_name))
slot['link'] = list(zip(slot.st_from_name, slot.st_to_name))
slot['tt'] = slot.time_end - slot.time_start
slot[slot.link.isin(a.link)].groupby('link').tt.mean()
slot[slot.link == ('БОЛЬШОЙ ЛУГ', 'ГОНЧАРОВО')].sort_values('time_start')[['slot', 'link', 'time_start_norm', 'tt']]

Unnamed: 0,slot,link,time_start_norm,tt
5877,200230644875,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 02:52",1080
11221,200230645019,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 03:35",1080
14460,200230664823,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 03:45",1080
4736,200230654960,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 04:04",1080
10792,200230674858,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 04:14",1080
14165,200230664942,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 04:22",1080
6721,200230654798,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 05:02",1080
11914,200230664893,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 05:12",1080
6177,200230654995,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 05:22",1080
11579,200230674885,"(БОЛЬШОЙ ЛУГ, ГОНЧАРОВО)","Jul 06, 05:43",1080


In [610]:
print(nice_time(current_time))

Jul 06, 14:41


In [611]:
print(train_info[train_info.ind434.isin(inds)].sort_values('ind434').reset_index()[['train', 'number', 'ind434', 'oper', 'oper_time_f', 'loc_name']].to_string())

Empty DataFrame
Columns: [train, number, ind434, oper, oper_time_f, loc_name]
Index: []


In [612]:
[ind for ind in inds if ind not in train_info.ind434.unique()]

['9379-342-9861', '8626-125-9861', '8622-692-9861', '8630-837-9861']

In [613]:
cols = ['team', 'number', 'loc_name', 'state', 'oper_time_f']
nice_print(team_info[(team_info.state == '0') & (team_info.link.isin(links.link) == False)], cols)

            team      number                                           loc_name state    oper_time_f
0   200200156425  9203009094                                      МЕГЕТ - МЕГЕТ     0  Jul 06, 12:56
1   200200200798  9409058311                      БОЛЬШОЙ НЕВЕР - БОЛЬШОЙ НЕВЕР     0  Jul 06, 13:28
2   200200201602  9203005042                              АНГАСОЛКА - АНГАСОЛКА     0  Jul 06, 10:31
3   200200181128  9611074717                                БЕРКАКИТ - БЕРКАКИТ     0  Jul 06, 07:11
4   200200205651  9207005093                                    ТАЛЬЦЫ - ТАЛЬЦЫ     0  Jul 06, 14:28
5   200200277852  9613054386                                      ТЫРМА - ТЫРМА     0  Jul 06, 07:40
6   200200274674  9214007339                                  СЮЛЬБАН - СЮЛЬБАН     0  Jul 06, 13:07
7   200200125800  9611049151                                      ТЫНДА - ТЫНДА     0  Jul 06, 13:01
8   200200090975  9613067578                              ПОСТЫШЕВО - ПОСТЫШЕВО     0  Jul 

In [614]:
ind = '8642-798-9857'
train_info[train_info.ind434 == ind][['train', 'number', 'weight', 'oper_time_f', 'loc_name']]

Unnamed: 0,train,number,weight,oper_time_f,loc_name
1763,210217266516,2728,5966,"Jul 06, 14:01",КОШУРНИКОВО - ЩЕТИНКИНО


In [615]:
loco_info['ser_name'] = loco_info.series.map(loco_series.set_index('ser_id').ser_name)
loco_plan['ser_name'] = loco_plan.loco.map(loco_info.set_index('loco').ser_name)
loco_info[loco_info.train == '210217266516'][['loco', 'number', 'ser_name']]
loco_info[loco_info.number == 2680]
loco_plan[loco_plan.loco == '200200093671'][['loco', 'number', 'ser_name', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'train']]

Unnamed: 0,loco,number,ser_name,st_from_name,st_to_name,time_start_norm,time_end_norm,state,train
52398,200200093671,2680,ВЛ80С,ЩЕТИНКИНО,КОШУРНИКОВО,"Jul 06, 20:35","Jul 06, 21:37",0,88880013304671
52399,200200093671,2680,ВЛ80С,КОШУРНИКОВО,РАЗЪЕЗД 557 КМ,"Jul 06, 23:45","Jul 07, 00:01",0,88880013304671
52400,200200093671,2680,ВЛ80С,РАЗЪЕЗД 557 КМ,ИРБА,"Jul 07, 00:01","Jul 07, 01:09",0,88880013304671
52401,200200093671,2680,ВЛ80С,ИРБА,КУРАГИНО,"Jul 07, 01:09","Jul 07, 01:22",0,88880013304671
52402,200200093671,2680,ВЛ80С,КУРАГИНО,МИНУСИНСК,"Jul 07, 01:22","Jul 07, 03:00",0,88880013304671
52403,200200093671,2680,ВЛ80С,МИНУСИНСК,ПОДСИНИЙ,"Jul 07, 03:00","Jul 07, 03:11",0,88880013304671
52404,200200093671,2680,ВЛ80С,ПОДСИНИЙ,АБАКАН,"Jul 07, 03:11","Jul 07, 03:19",0,88880013304671
52405,200200093671,2680,ВЛ80С,АБАКАН,ТАШЕБА,"Jul 07, 07:05","Jul 07, 07:14",0,88880013304671
52406,200200093671,2680,ВЛ80С,ТАШЕБА,ТИГЕЙ,"Jul 07, 07:14","Jul 07, 07:30",0,88880013304671
52407,200200093671,2680,ВЛ80С,ТИГЕЙ,ОРОСИТЕЛЬНЫЙ,"Jul 07, 07:30","Jul 07, 07:38",0,88880013304671


In [617]:
nice_time(current_time)

'Jul 06, 14:41'

In [637]:
st_name = 'ИРКУТСК-СОРТИРОВОЧНЫЙ'
cols = ['team', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm', 'state', 'loco', 'all_states']
irk = team_plan[(team_plan.st_from_name == st_name) & (team_plan.state.isin([0, 1]))
          & (team_plan.depot_name == st_name)
          & (team_plan.time_start >= current_time) & (team_plan.time_start < current_time + 24 * 3600)]
irk.team.count()

84

In [634]:
cols = ['train', 'train_type', 'st_from_name', 'st_to_name', 'time_start_norm', 'time_end_norm']
a = train_plan[(train_plan.st_from_name == st_name)
          & (train_plan.time_start >= current_time) & (team_plan.time_start < current_time + 24 * 3600)].sort_values('time_start')[cols]
a.train_type.value_counts()

2    72
9    70
8    41
Name: train_type, dtype: int64

In [672]:
b = team_plan[(team_plan.st_from_name == st_name) & (team_plan.state.isin([0, 1]))          
          & (team_plan.time_start >= current_time) & (team_plan.time_start < current_time + 24 * 3600)].groupby('st_to_name').depot_name.value_counts()
q = team_plan[(team_plan.st_from_name == st_name) & (team_plan.state.isin([0, 1]))          
          & (team_plan.time_start >= current_time) & (team_plan.time_start < current_time + 24 * 3600)].groupby('st_to_name').team.count()
b = b.reset_index()
b['s'] = b.st_to_name.map(q)
b['perc'] = b[0] / b['s']
b[['st_to_name', 'depot_name', 'perc']].set_index(['st_to_name', 'depot_name']

Unnamed: 0_level_0,Unnamed: 1_level_0,perc
st_to_name,depot_name,Unnamed: 2_level_1
БАТАРЕЙНАЯ,ЗИМА,0.519608
БАТАРЕЙНАЯ,ИРКУТСК-СОРТИРОВОЧНЫЙ,0.470588
БАТАРЕЙНАЯ,СЛЮДЯНКА I,0.009804
ВОЕННЫЙ ГОРОДОК,ИРКУТСК-СОРТИРОВОЧНЫЙ,0.684211
ВОЕННЫЙ ГОРОДОК,СЛЮДЯНКА I,0.263158
ВОЕННЫЙ ГОРОДОК,ЗИМА,0.052632
ГОНЧАРОВО,ИРКУТСК-СОРТИРОВОЧНЫЙ,0.589744
ГОНЧАРОВО,ЗИМА,0.333333
ГОНЧАРОВО,СЛЮДЯНКА I,0.076923


In [654]:
cols = ['train', 'train_type', 'time_start_norm', 'time_end_norm', 'start_st', 'end_st']
train_p = a[a.train_type == '8']
train_plan['start_st'] = train_plan.train.map(train_plan.drop_duplicates('train').set_index('train').st_from_name)
train_plan['end_st'] = train_plan.train.map(train_plan.drop_duplicates('train', keep='last').set_index('train').st_to_name)
train_plan[train_plan.train.isin(train_p.train)][cols].drop_duplicates('train').start_st.value_counts()

ИРКУТСК-СОРТИРОВОЧНЫЙ    21
ЧИТА I                    5
УЛАН-УДЭ                  4
КАЯ                       2
АЧИНСК I                  1
РЕШОТЫ                    1
ГОНЧАРОВО                 1
КИТОЙ                     1
ЧЕЛУТАЙ                   1
КАРЫМСКАЯ                 1
КРАСНОЯРСК                1
СЛЮДЯНКА I                1
БОГОТОЛ                   1
Name: start_st, dtype: int64

In [None]:
reg = '2002119299'
a = loco_info[(loco_info.regions.apply(lambda x: reg in x)) 
          & (loco_info.st_from == '-1') 
          & (loco_info.train == '-1')][['loco', 'loc_name', 'train']]
st = 'СУХОВСКАЯ'
paths = pd.read_csv(FOLDER + 'mandatory/paths.csv', encoding='utf-8-sig', sep=';')

In [902]:
regs = stations.loco_region.unique()
#select = regs[np.random.randint(len(regs))]
#select
for reg in regs:    
    a = loco_info[(loco_info.regions.apply(lambda x: str(reg) in x)) 
          & (loco_info.st_from == '-1') 
          & (loco_info.train == '-1')][['loco', 'loc_name', 'train']]
    if not a.empty:
        b = stations[stations.loco_region == reg].name.unique()
        st = b[np.random.randint(len(b))]        
        paths_s = paths[(paths.st_to == st) & (paths.st_from.isin(b))]
        #lim = np.percentile(paths_s.cost, 25)
        m = paths_s.cost.max()
        costs = paths_s[['st_from', 'cost']].set_index('st_from').to_dict()['cost']
        a['cost'] = a.loc_name.apply(lambda x: costs[x] if x in costs.keys() else 100)    
        act_lim = np.percentile(a.cost, 25)
        good_n = a[a.cost.apply(lambda x: x <= lim)].loco.count()
        total_n = a.loco.count()
        print('Region %s, station %s, 25%%-percentile = %.2f (max reg = %.2f), number of available locos = %d of %d (%.2f%%)'
             % (reg, st, act_lim, m, good_n, total_n, 100 * good_n / total_n))
    else:
        print('Empty reg', reg)

Region 2002119312, station БОЛЬШОЙ КАМЕНЬ, 25%-percentile = 3.00 (max reg = 16.00), number of available locos = 19 of 34 (55.88%)
Region 2002119288, station МАРГУЦЕК, 25%-percentile = 3.40 (max reg = 5.10), number of available locos = 12 of 23 (52.17%)
Region 2002119299, station СЛЮДЯНКА II, 25%-percentile = 5.50 (max reg = 40.50), number of available locos = 16 of 243 (6.58%)
Empty reg 2002119284
Region 2002119286, station СУРИКОВО, 25%-percentile = 3.40 (max reg = 3.40), number of available locos = 6 of 21 (28.57%)
Region 2002119301, station ИГИРМА, 25%-percentile = 7.95 (max reg = 66.70), number of available locos = 15 of 94 (15.96%)
Region 2002119322, station МАРИИНСК, 25%-percentile = 7.75 (max reg = 26.50), number of available locos = 0 of 10 (0.00%)
Region 2002119316, station АНГАРАКАН, 25%-percentile = 1.88 (max reg = 18.00), number of available locos = 2 of 2 (100.00%)
Region 2002119305, station МОШКА, 25%-percentile = 100.00 (max reg = 37.50), number of available locos = 3 of

In [905]:
reg = 2002119299
a = loco_info[(loco_info.regions.apply(lambda x: str(reg) in x)) 
      & (loco_info.st_from == '-1') 
      & (loco_info.train == '-1')][['loco', 'loc_name', 'train']]
if not a.empty:
    b = stations[stations.loco_region == reg].name.unique()
    #st = b[np.random.randint(len(b))]        
    st = 'МАРИИНСК'
    paths_s = paths[(paths.st_to == st) & (paths.st_from.isin(b))]
    lim = np.percentile(paths_s.cost, 25)
    m = paths_s.cost.max()
    costs = paths_s[['st_from', 'cost']].set_index('st_from').to_dict()['cost']
    a['cost'] = a.loc_name.apply(lambda x: costs[x] if x in costs.keys() else 100)    
    print(a.sort_values('cost').to_string(index=False))
    act_lim = np.percentile(a.cost, 25)
    good_n = a[a.cost.apply(lambda x: x <= lim)].loco.count()
    total_n = a.loco.count()
    print('Region %s, station %s, 25%%-percentile = %.2f (actual = %.2f, max = %.2f), number of available locos = %d of %d (%.2f%%)'
         % (reg, st, lim, act_lim, m, good_n, total_n, 100 * good_n / total_n))
else:
    print('Empty reg', reg)

         loco                  loc_name train   cost
 200200098725                  МАРИИНСК    -1    0.0
 200200059783                  МАРИИНСК    -1    0.0
 200200068517                  МАРИИНСК    -1    0.0
 200200106012                  МАРИИНСК    -1    0.0
 200200104985                  МАРИИНСК    -1    0.0
 200200084828                  МАРИИНСК    -1    0.0
 200200097215                  МАРИИНСК    -1    0.0
 200200086141                   БОГОТОЛ    -1    1.0
 200200100613                   БОГОТОЛ    -1    1.0
 200200104494                   БОГОТОЛ    -1    1.0
 200200058145                   БОГОТОЛ    -1    1.0
 200200087718                   БОГОТОЛ    -1    1.0
 200200098870                  АЧИНСК I    -1    3.0
 200200094741                  АЧИНСК I    -1    3.0
 200200085633                  АЧИНСК I    -1    3.0
 200200095381                  АЧИНСК I    -1    3.0
 200200092518                     БУГАЧ    -1    5.5
 200200063807                КРАСНОЯРСК    -1 

In [911]:
def func(row):
    df = paths[(paths.st_from == row.st_from_name) & (paths.st_to == row.st_to_name)]
    return df.cost.values[0] if not df.empty else 1000

start = train_plan[train_plan.train_type == '8'].drop_duplicates('train')
end = train_plan[train_plan.train_type == '8'].drop_duplicates('train', keep='last')
start['end'] = start.train.map(end.set_index('train').st_to_name)
q = start[['train', 'st_from_name', 'st_to_name']]

q['cost'] = q.apply(lambda row: func(row), axis=1)
q

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,train,st_from_name,st_to_name,cost
22,88880005294405,СМОЛЯНИНОВО,ШКОТОВО,1.0
54,88880005294423,ВИХОРЕВКА,ТУРМА,1.0
774,88880009954708,УЛАН-УДЭ,ЗАУДИНСКИЙ,1.0
851,88880002004287,ТАЙШЕТ,ЮРТЫ,1.0
864,88880002004285,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,0.0
1071,88880002004323,ИРКУТСК-СОРТИРОВОЧНЫЙ,БАТАРЕЙНАЯ,0.0
1079,88880002004335,ТОРЕЯ,ТАЙШЕТ,1.0
1080,88880002004333,АБАКАН,ТАШЕБА,1.0
1096,88880002004331,ТАЙШЕТ,ЮРТЫ,1.0
1166,88880002004343,ЛЕНА,ХРЕБТОВАЯ,1.0
