In [114]:
FOLDER = 'resources/'

import numpy as np
import pandas as pd
import time, datetime
from ast import literal_eval
import matplotlib.pyplot as plt
import seaborn as sns

#%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rc('font', family='Times New Roman')

pd.set_option('max_rows', 50)

time_format = '%b %d, %H:%M'

start_time = time.time()
current_time = pd.read_csv(FOLDER + 'current_time.csv').current_time[0]
twr          = pd.read_csv(FOLDER + 'team_work_region.csv', converters={'twr':str})
links        = pd.read_csv(FOLDER + 'link.csv')
stations     = pd.read_csv(FOLDER + 'station.csv', converters={'station':str})
train_info   = pd.read_csv(FOLDER + 'train_info.csv', converters={'train': str, 'st_from':str, 'st_to':str, 'oper_location':str,
                                                                 'st_from':str, 'st_to':str})
train_plan   = pd.read_csv(FOLDER + 'slot_train.csv', converters={'train': str, 'st_from':str, 'st_to':str})
loco_info    = pd.read_csv(FOLDER + 'loco_attributes.csv', converters={'train':str, 'loco':str, 'depot':str,
                                                                      'st_from':str, 'st_to':str})
loco_plan    = pd.read_csv(FOLDER + 'slot_loco.csv', converters={'train':str, 'loco':str, 'st_from':str, 'st_to':str})
team_info    = pd.read_csv(FOLDER + 'team_attributes.csv', converters={'team':str,'depot':str, 'oper_location':str, \
                                                                 'st_from':str, 'st_to':str, 'loco':str, 'depot_st':str})
team_plan    = pd.read_csv(FOLDER + 'slot_team.csv', converters={'team':str,'loco':str, 'st_from':str, 'st_to':str})
loco_series  = pd.read_csv(FOLDER + 'loco_series.csv')

team_info.regions = team_info.regions.apply(literal_eval)
st_names = stations[['station', 'name', 'esr']].drop_duplicates().set_index('station')
print('Log time: %d, %s' % (current_time, time.ctime(current_time)))
print('Read csvs:', np.round(time.time() - start_time, 2), 'sec')

Log time: 1467099475, Tue Jun 28 10:37:55 2016
Read csvs: 0.91 sec


In [115]:
# Мержим таблицы _plan и _info для поездов, локомотивов и бригад
# Добавляем во все таблицы названия станций на маршруте и времена отправления/прибытия в читабельном формате

def add_info(df):    
    if 'st_from' in df.columns:
        df['st_from_name'] = df.st_from.map(st_names.name)
    if 'st_to' in df.columns:
        df['st_to_name'] = df.st_to.map(st_names.name)
    if 'time_start' in df.columns:
        df['time_start_norm'] = df.time_start.apply(lambda x: time.strftime(time_format, time.localtime(x)))
    if 'time_end' in df.columns:
        df['time_end_norm'] = df.time_end.apply(lambda x: time.strftime(time_format, time.localtime(x)))
    if 'oper_location' in df.columns:
        df['oper_location_name'] = df.oper_location.map(st_names.name)    
        df.oper_location_name.fillna(0, inplace=True)
    if ('oper_location' in df.columns) & ('st_from' in df.columns) & ('st_to' in df.columns):        
        df['loc_name'] = df.oper_location_name
        df.loc[df.loc_name == 0, 'loc_name'] = df.st_from_name + ' - ' + df.st_to_name
    
add_info(train_plan)
add_info(loco_plan)
add_info(team_plan)
add_info(loco_info)
add_info(team_info)
add_info(train_info)
train_plan = train_plan.merge(train_info, on='train', suffixes=('', '_info'), how='left')
loco_plan = loco_plan.merge(loco_info, on='loco', suffixes=('', '_info'), how='left')
team_plan = team_plan.merge(team_info, on='team', suffixes=('', '_info'), how='left')
team_plan['team_type'] = team_plan.team.apply(lambda x: 'Реальная' if str(x)[0] == '2' else 'Фейковая')

In [116]:
def nice_time(t):
    return time.strftime(time_format, time.localtime(t)) if t > 0 else ''

In [117]:
even = pd.read_csv('irk_odd.csv', sep=';')
even.columns = ['n', 'number', 'ind', 'time', 'weight']
even['ind'] = even.ind.apply(lambda x: x.replace(' ', '-'))
even.head()

Unnamed: 0,n,number,ind,time,weight
0,1,1179,9859-266-2305,10:57,2583
1,2,2049,9877-013-9200,11:11,1650
2,3,1085,9369-529-1305,11:26,1481
3,4,2481,9722-022-8623,11:33,2003
4,5,3847,9300-755-9325,11:50,4437


In [118]:
train_info[['train', 'number', 'ind434']].head()

Unnamed: 0,train,number,ind434
0,210251643764,9227,9351-986-9333
1,210251487430,3471,8891-976-8880
2,210251645533,9210,9231-886-9320
3,210251314711,3145,9600-329-9379
4,210251555062,2025,9600-333-9379


### Пробуем просто найти поезда по индексу

In [119]:
even['train_id'] = even.ind.map(train_info.set_index('ind434').train)
good = even[even.train_id.isnull() == False].ind.count()
bad = even[even.train_id.isnull()].ind.count()
total = even.ind.count()
print('Find indices: %d of %d\nNot detected trains: %d' % (good, total, bad))
even.head()

Find indices: 43 of 94
Not detected trains: 51


Unnamed: 0,n,number,ind,time,weight,train_id
0,1,1179,9859-266-2305,10:57,2583,200258764628.0
1,2,2049,9877-013-9200,11:11,1650,200258008316.0
2,3,1085,9369-529-1305,11:26,1481,210251533284.0
3,4,2481,9722-022-8623,11:33,2003,200258788958.0
4,5,3847,9300-755-9325,11:50,4437,


### Пробуем найти поезда по части индекса (без хвоста)

In [120]:
even['ind_part'] = even.ind.apply(lambda x: x[:-5])
train_info['ind_part'] = train_info.ind434.apply(lambda x: x[:-5])
train_info.groupby('ind_part').train.unique()
even['susp_trains'] = even.ind_part.map(train_info.groupby('ind_part').train.unique())
even.train_id.fillna(even.susp_trains, inplace=True)
good = even[even.train_id.isnull() == False].ind.count()
bad = even[even.train_id.isnull()].ind.count()
print('Search of part of index:')
print('Find indices: %d of %d\nNot detected trains: %d' % (good, total, bad))  

Search of part of index:
Find indices: 55 of 94
Not detected trains: 39


### Из оставшихся выбираем поезда своего формирования (они будут созданы из ССП)

In [121]:
even['start_esr'] = even.ind.apply(lambda x: x[:4])
even[even.train_id.isnull()]
irk = even[(even.train_id.isnull()) & (even.start_esr == '9300')].ind.count()
bad = even[(even.train_id.isnull()) & (even.start_esr != '9300')].ind.count()
print('Search of Irk trains (SSP)')
print('Fing irk trains: %d' % irk)
print('Find trains: %d of %d\nNot detected trains: %d' % (total - bad, total, bad))
even[(even.train_id.isnull()) & (even.start_esr == '9300')].head()

Search of Irk trains (SSP)
Fing irk trains: 34
Find trains: 89 of 94
Not detected trains: 5


Unnamed: 0,n,number,ind,time,weight,train_id,ind_part,susp_trains,start_esr
4,5,3847,9300-755-9325,11:50,4437,,9300-755,,9300
5,6,3849,9300-756-9325,12:25,2303,,9300-756,,9300
8,9,3851,9300-757-9325,13:20,4437,,9300-757,,9300
14,15,3801,9300-526-9325,15:19,1506,,9300-526,,9300
15,16,3821,9300-758-9325,15:20,2303,,9300-758,,9300


### Смотрим, какие поезда остались не найденными

In [122]:
stations['esr4'] = stations.esr.apply(lambda x: (str(x))[:4])
even['start_name'] = even.start_esr.map(stations.drop_duplicates('esr4').set_index('esr4').name)
even[(even.train_id.isnull()) & (even.start_esr != '9300')]

Unnamed: 0,n,number,ind,time,weight,train_id,ind_part,susp_trains,start_esr,start_name
65,66,1627,9328-071-9222,3:10,1533,,9328-071,,9328,КАЯ
66,67,1659,9331-345-9321,3:39,4183,,9331-345,,9331,СЛЮДЯНКА II
75,76,2011,9338-267-9322,6:20,2035,,9338-267,,9338,БАЙКАЛЬСК
85,86,2963,9687-793-8927,8:57,2032,,9687-793,,9687,ДЗЕМГИ
89,90,2963,9845-743-9200,9:31,2029,,9845-743,,9845,НАХОДКА


### Убираем поезда с близких станций формирования (начинающиеся на 93)

In [123]:
even['end_esr'] = even.ind.apply(lambda x: x[-4:])
even['end_name'] = even.end_esr.map(stations.drop_duplicates('esr4').set_index('esr4').name)
not_found = even[(even.train_id.isnull()) & (even.start_esr.apply(lambda x: x[:2] != '93'))]
not_found

Unnamed: 0,n,number,ind,time,weight,train_id,ind_part,susp_trains,start_esr,start_name,end_esr,end_name
85,86,2963,9687-793-8927,8:57,2032,,9687-793,,9687,ДЗЕМГИ,8927,УЯР
89,90,2963,9845-743-9200,9:31,2029,,9845-743,,9845,НАХОДКА,9200,ТАЙШЕТ


### Загружаем csv с отсевами, ищем непереданные в планировщик поезда там

In [124]:
otsev = pd.read_csv('otsev_detail.csv', sep=';', dtype={'train_index':str, 'train_id':str})
otsev['ind434'] = otsev['train_index'].apply(lambda x: str(x)[:4] + '-' + str(x)[6:9] + '-' + str(x)[9:-2])
otsev[['train_id', 'train_index', 'ind434']]
#print(list(otsev.columns))
otsev[otsev.ind434.isin(not_found.ind)].sort_values('ind434')[['train_index', 'ind434', 'out', 'otsev_list']].dropna(subset=['out'])

Unnamed: 0,train_index,ind434,out,otsev_list
17426,984502743920002,9845-743-9200,1,"t_out_departing_without_loco,out"


In [127]:
otsev['ind434_part'] = otsev.ind434.apply(lambda x: x[:-5])
a = otsev[(otsev.ind434.isin(not_found.ind) == False) 
             & (otsev.ind434_part.isin(not_found.ind_part))].sort_values('ind434')[['train_id', 'train_index', 'ind434', 'out', 'otsev_list']].dropna(subset=['out'])
a

Unnamed: 0,train_id,train_index,ind434,out,otsev_list


### Непереданные поезда, которых нет и в логах отсевов

In [128]:
not_found[not_found.ind_part.isin(otsev.ind434_part) == False]

Unnamed: 0,n,number,ind,time,weight,train_id,ind_part,susp_trains,start_esr,start_name,end_esr,end_name
85,86,2963,9687-793-8927,8:57,2032,,9687-793,,9687,ДЗЕМГИ,8927,УЯР
