In [1]:
import numpy as np
import pandas as pd
import pickle
import time
import datetime

### Helper Functions 

In [10]:
def convert_minutes(string_time):
    x = time.strptime(string_time, '%H:%M')
    return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds() // 60

### Load in Initial Schedule

In [2]:
red1 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Red-1.csv')
red2 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Red-2.csv')
orange3 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Orange-3.csv')
orange4 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Orange-4.csv')
blue5 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Blue-5.csv')
blue6 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Blue-6.csv')
yellow7 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Yellow-7.csv')
yellow8 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Yellow-8.csv')
green9 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Green-9.csv')
green10 = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Green-10.csv')
combined = pd.read_csv('initial_schedule/2028_BART_Schedule_v1.2 - Combined.csv')


### Define Necessary Sets 

In [112]:
segment_departureYard = {1 : "R", 
                        2 : "M", 
                        3 : "R", 
                        4 : "W", 
                        5 : "D", 
                        6 : "E", 
                        7 : "P", 
                        8 : "S", 
                        9 : "D", 
                        10 : "W"}

segment_arrivalYard = {1 : "M", 
                        2 : "R", 
                        3 : "W", 
                        4 : "R", 
                        5 : "E", 
                        6 : "D", 
                        7 : "S", 
                        8 : "P", 
                        9 : "W", 
                        10 : "D"}


# Passenger Demand Intervals 
demand_intervals = np.arange(240, 1441, 60)

# 2 hour shifts according to a 10 hour day  
shift = np.arange(1, 11)



## Generate Alpha_ac & Beta_ac Sets

In [189]:
alpha = combined.drop(columns=["Time of Arrival (beta)", "Rush"])
beta = combined.drop(columns=["Time of Departure (alpha)", "Rush"])
alpha.to_pickle('alpha.pkl')
beta.to_pickle('beta.pkl')
                

## Generate Departure Index Set

In [203]:
segment_index = combined.drop(columns=["Rush", "Time of Departure (alpha)", "Time of Arrival (beta)"])
segment_index.rename(columns = {"Travel Segment (a)" : "segment"}, inplace=True)
segment_index.head()

Unnamed: 0,segment,Departure Index (c),yard
0,1,1,R
1,1,2,R
2,1,3,R
3,1,4,R
4,1,5,R


In [204]:
segment_index.to_pickle("segment_index.pkl")

In [208]:
# segment_index
np.max(segment_index[["Departure Index (c)"]].values)

109

## PreProcess Inventory Yard Event Table 

In [113]:
red1['segment'] = pd.DataFrame(np.array([int(1)] * red1.size))
red2['segment'] = pd.DataFrame(np.array([int(2)] * red2.size))
orange3['segment'] = pd.DataFrame(np.array([int(3)] * orange3.size))
orange4['segment'] = pd.DataFrame(np.array([int(4)] * orange4.size))
blue5['segment'] = pd.DataFrame(np.array([int(5)] * blue5.size))
blue6['segment'] = pd.DataFrame(np.array([int(6)] * blue6.size))
yellow7['segment'] = pd.DataFrame(np.array([int(7)] * yellow7.size))
yellow8['segment'] = pd.DataFrame(np.array([int(8)] * yellow8.size))
green9['segment'] = pd.DataFrame(np.array([int(9)] * green9.size))
green10['segment'] = pd.DataFrame(np.array([int(10)] * green10.size))

# Fix column name of green10
green10.rename(columns={"Departure time" : "Time of departure (2400)"}, inplace=True)


In [114]:
# Sorted Departure times for each yard 
R_d = pd.concat([red1, orange3], sort=True)
M_d = red2
W_d = pd.concat([orange4, green10], sort=True)
D_d = pd.concat([blue5, green9], sort=True)
E_d = blue6
P_d = yellow7
S_d = yellow8

R_d = R_d.drop(columns=["Rush", "Arrival Time"])
M_d = M_d.drop(columns=["Rush", "Arrival Time"])
W_d = W_d.drop(columns=["Rush", "Arrival Time"])
D_d = D_d.drop(columns=["Rush", "Arrival Time"])
E_d = E_d.drop(columns=["Rush", "Arrival Time"])
P_d = P_d.drop(columns=["Rush", "Arrival Time"])
S_d = S_d.drop(columns=["Unnamed: 3", "Arrival Time"])

R_d["Event"] = pd.DataFrame(-1*np.ones(R_d.size))
M_d["Event"] = pd.DataFrame(-1*np.ones(M_d.size))
W_d["Event"] = pd.DataFrame(-1*np.ones(W_d.size))
D_d["Event"] = pd.DataFrame(-1*np.ones(D_d.size))
E_d["Event"] = pd.DataFrame(-1*np.ones(E_d.size))
P_d["Event"] = pd.DataFrame(-1*np.ones(P_d.size))
S_d["Event"] = pd.DataFrame(-1*np.ones(S_d.size))

R_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
M_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
W_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
D_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
E_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
P_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)
S_d.rename(columns={"Time of departure (2400)" : "Event Time"}, inplace=True)



In [115]:
# Sorted Arrival times for each yard 
M_a = red1
R_a = pd.concat([red2, orange4], sort=True)
W_a = pd.concat([orange3, green9], sort=True)
E_a = blue5
D_a = blue6
S_a = yellow7
P_a = yellow8

R_a = R_a.drop(columns=["Rush", "Time of departure (2400)"])
M_a = M_a.drop(columns=["Rush", "Time of departure (2400)"])
W_a = W_a.drop(columns=["Rush", "Time of departure (2400)"])
D_a = D_a.drop(columns=["Rush", "Time of departure (2400)"])
E_a = E_a.drop(columns=["Rush", "Time of departure (2400)"])
P_a = P_a.drop(columns=["Unnamed: 3", "Time of departure (2400)"])
S_a = S_a.drop(columns=["Rush", "Time of departure (2400)"])

R_a["Event"] = pd.DataFrame(np.ones(R_a.size))
M_a["Event"] = pd.DataFrame(np.ones(M_a.size))
W_a["Event"] = pd.DataFrame(np.ones(W_a.size))
D_a["Event"] = pd.DataFrame(np.ones(D_a.size))
E_a["Event"] = pd.DataFrame(np.ones(E_a.size))
P_a["Event"] = pd.DataFrame(np.ones(P_a.size))
S_a["Event"] = pd.DataFrame(np.ones(S_a.size))

R_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True)
M_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True)
W_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True)
D_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True) 
E_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True) 
P_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True) 
S_a.rename(columns={"Arrival Time" : "Event Time"}, inplace=True)




# Find Corresponding Train segment a and index c for each yard 

# Iterate through each column. If 

In [116]:
S_a.head()

Unnamed: 0,Departure Index (c),Event Time,segment,Event
0,1,5:13,7,1.0
1,2,5:25,7,1.0
2,3,5:37,7,1.0
3,4,5:49,7,1.0
4,5,6:01,7,1.0


In [117]:
# All Events for each yard 

R = pd.concat([R_d, R_a], sort=True)
M = pd.concat([M_d, M_a], sort=True)
W = pd.concat([W_d, W_a], sort=True)
D = pd.concat([D_d, D_a], sort=True)
E = pd.concat([E_d, E_a], sort=True)
P = pd.concat([P_d, P_a], sort=True)
S = pd.concat([S_d, S_a], sort=True)

# listDFS = [R, M, W, D, E, P, S]
# for df in listDFS:
#     df = df.reset_index()
#     df = df.drop(columns=["Departure Index (c)", "index"])
#     df["index"] = pd.DataFrame(np.arange(1, df.size))


R = R.reset_index()
R = R.drop(columns=["Departure Index (c)", "index"])
R["yard event index"] = pd.DataFrame(np.arange(1, R.size))

M = M.reset_index()
M = M.drop(columns=["Departure Index (c)", "index"])
M["yard event index"] = pd.DataFrame(np.arange(1, M.size))

W = W.reset_index()
W = W.drop(columns=["Departure Index (c)", "index"])
W["yard event index"] = pd.DataFrame(np.arange(1, W.size))

D = D.reset_index()
D = D.drop(columns=["Departure Index (c)", "index"])
D["yard event index"] = pd.DataFrame(np.arange(1, D.size))

E = E.reset_index()
E = E.drop(columns=["Departure Index (c)", "index"])
E["yard event index"] = pd.DataFrame(np.arange(1, E.size))

P = P.reset_index()
P = P.drop(columns=["Departure Index (c)", "index"])
P["yard event index"] = pd.DataFrame(np.arange(1, P.size))

S = S.reset_index()
S = S.drop(columns=["Departure Index (c)", "index"])
S["yard event index"] = pd.DataFrame(np.arange(1, S.size))

R.head()

Unnamed: 0,Event,Event Time,segment,yard event index
0,-1.0,4:00,1,1
1,-1.0,4:12,1,2
2,-1.0,4:24,1,3
3,-1.0,4:36,1,4
4,-1.0,4:48,1,5


In [118]:
# Combine all event yard dataframes into one. 

R_list = ["R"] * R.size
M_list = ["M"] * M.size
W_list = ["W"] * W.size
D_list = ["D"] * D.size
E_list = ["E"] * E.size
P_list = ["P"] * P.size
S_list = ["S"] * S.size

R["yard"] = pd.DataFrame(np.array(R_list))
M["yard"] = pd.DataFrame(np.array(M_list))
W["yard"] = pd.DataFrame(np.array(W_list))
D["yard"] = pd.DataFrame(np.array(D_list))
E["yard"] = pd.DataFrame(np.array(E_list))
P["yard"] = pd.DataFrame(np.array(P_list))
S["yard"] = pd.DataFrame(np.array(S_list))

yardEvents = pd.concat([R, M, W, D, E, P, S])
yardEvents = yardEvents.sort_values(by=['Event Time'])

In [214]:
yardEvents.where(yardEvents['yard'] == "R").dropna().sort_values(by=["yard event index"])

Unnamed: 0,Event,Event Time,segment,yard event index,yard
0,-1.0,4:00,1.0,1.0,R
1,-1.0,4:12,1.0,2.0,R
2,-1.0,4:24,1.0,3.0,R
3,-1.0,4:36,1.0,4.0,R
4,-1.0,4:48,1.0,5.0,R
5,-1.0,5:00,1.0,6.0,R
6,-1.0,5:12,1.0,7.0,R
7,-1.0,5:24,1.0,8.0,R
8,-1.0,5:36,1.0,9.0,R
9,-1.0,5:48,1.0,10.0,R


In [210]:
yardEvents.to_pickle("yardEvents.pkl")

In [144]:
# All departure yard events 
departureYardEvents = yardEvents.where(yardEvents["Event"] == -1.0).dropna()


# All Arrival yard events 
arrivalYardEvents = yardEvents.where(yardEvents["Event"] == 1.0).dropna()

In [153]:
test = departureYardEvents.where(departureYardEvents["yard"] == "D").dropna()
# test.sort_values(by=["yard event index"])
# test

In [122]:
yardColumn = [];
for index, row in combined.iterrows():
    yard = segment_departureYard.get(int(row["Travel Segment (a)"]))
    yardColumn.append(yard)


In [123]:
combined['yard'] = pd.DataFrame(np.array(yardColumn))

In [140]:
segment_index = combined.drop(columns=["Rush", "Time of Departure (alpha)", "Time of Arrival (beta)"])
segment_index.rename(columns = {"Travel Segment (a)" : "segment"}, inplace=True)
segment_index.head()


Unnamed: 0,segment,Departure Index (c),yard
0,1,1,R
1,1,2,R
2,1,3,R
3,1,4,R
4,1,5,R


In [154]:
departureYardEvents.head()

Unnamed: 0,Event,Event Time,segment,yard event index,yard
98,-1.0,0:00,5.0,99.0,D
173,-1.0,0:00,3.0,174.0,R
108,-1.0,0:00,8.0,109.0,S
98,-1.0,0:00,6.0,99.0,E
108,-1.0,0:00,7.0,109.0,P


In [173]:
departureYardEvents.size
# segment_index.size 

4680

In [178]:
segmentIndex = []
for i, yardRow in departureYardEvents.iterrows():
    for j, segmentRow in segment_index.iterrows():
        if (yardRow.segment == segmentRow.segment and yardRow.yard == segmentRow.yard):
            print(segmentRow["Departure Index (c)"])
            segmentIndex.append(segmentRow["Departure Index (c)"])
            break
    break
#             segment_index.append(segmentRow["Departure Index (c)"])

1


In [180]:
test = pd.merge(segment_index, departureYardEvents, how="right", on=["segment", "yard"])
test.drop_duplicates("yard event index", inplace=True)
test

Unnamed: 0,segment,Departure Index (c),yard,Event,Event Time,yard event index
0,1,1,R,-1.0,10:00,34.0
78,1,1,R,-1.0,10:12,35.0
156,1,1,R,-1.0,10:24,36.0
234,1,1,R,-1.0,10:36,37.0
312,1,1,R,-1.0,10:48,38.0
390,1,1,R,-1.0,11:00,39.0
468,1,1,R,-1.0,11:12,40.0
546,1,1,R,-1.0,11:24,41.0
624,1,1,R,-1.0,11:36,42.0
702,1,1,R,-1.0,11:48,43.0


In [168]:
# >>> df.join(other, lsuffix='_caller', rsuffix='_other')
# new_df = pd.merge(A_df, B_df,  how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])

# joinedCombinedDepartures = pd.merge(departureYardEvents, segment_index, 
#                                     how="left", 
#                                     left_on=["segment", "yard"], 
#                                     right_on=["segment", "yard"])
# joinedCombinedDepartures.where(joinedCombinedDepartures["yard"] == "D").dropna()