In [81]:
import pandas as pd
from pandasql import sqldf
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from scipy import stats
import os

# Shutil is a recursive tool to delete file paths
# Note: Only needed if you plan to overwrite existing filepaths
import shutil

## Node Matching

In [130]:
match_list = pd.read_csv(os.path.join(os.path.join(os.getcwd(), "reso0.01"), 'Node_list_reso0.01_sort.csv'),index_col=0)
match_list

Unnamed: 0,Node,Lon,Lat
0,1,-89.02,41.85
1,2,-88.35,41.79
2,3,-88.35,42.10
3,4,-88.34,42.09
4,5,-88.32,42.02
...,...,...,...
81,82,-83.55,41.58
82,83,-83.55,42.23
83,84,-83.38,42.13
84,85,-83.35,42.37


In [83]:
# Getting the Trucks Data from Cummins Datasheet
dataSourcePath = 'given_sources//Batch_Div27_2021_03_months_Class_8_Results_metrics.xlsx'
df1 = pd.read_excel(dataSourcePath, index_col=None, usecols=['Vehicle Model-none',\
                      'Battery Energy Consumption per Mile-kWh/mi',\
                      'Fuel Consumption per 100 km-Diesel Equiv. l/100km',\
                      'Initial SOC-%',\
                      'Tire Crr-none',\
                      'Vehicle Static Mass-lbm',\
                      'Cycle Name-none'])

In [138]:
# Ruiting Checking method
# Logarithmic histogram plots
from cmath import nan


def checking_par(x):
    a = x.replace('p', '.')
    a = a.split('_')[1:5]
    a = [j for j in a]
    return (float(a[0]), float(a[1]), float(a[2]), float(a[3]))

def checking_rest(a):
    start_node_matched = False
    end_node_matched = False
    
    a = [float(x) for x in a]
    
    # Pre for loop arrangements
    node_col = match_list['Node'].values
    lon_lst = match_list['Lon'].values
    min_heap_i = {}
    min_heap_j = {}
    
    for idx, lat in enumerate(match_list['Lat'].values):
        lon = lon_lst[idx]

        # Calculate the distance between node in data and nodelist
        # NEED MODIFICATION
        # Distance calculation
        i_dist = np.sqrt((lat-a[0])**2 + (lon - a[1])**2) 
        j_dist = np.sqrt((lat-a[2])**2 + (lon - a[3])**2)
        
        # store distances in heap
        min_heap_i[i_dist] = node_col[idx]
        min_heap_j[j_dist] = node_col[idx]
        
    # sort the heap by distance
    a = sorted(min_heap_i)
    b = sorted(min_heap_j)
    i_min = a[0]
    j_min = b[0]
    
    # Checking if there are more than one points that match
#     if a[0] == 0 and a[1] == 0 :
#         print('multiple_mins')
#     if b[0] == 0 and b[1] == 0 :
#         print('multiple_mins')
        
    # Assign min_dist node
    i = min_heap_i[i_min]
    j = min_heap_j[j_min]

    return i, j

df2 = df1.copy()
print("Labelling...")
df2['temp'] = df2['Cycle Name-none'].apply(checking_par)
df2[['S_lat','S_lon','E_lat','E_lon']] = pd.DataFrame(df2.temp.tolist(), index= df2.index).div(2).round(2).multiply(2)
df3 = df2.merge(match_list,how='left',left_on=['S_lat','S_lon'],right_on=['Lat','Lon']).merge(match_list,how='left',left_on=['E_lat','E_lon'],right_on=['Lat','Lon'])

df2['(i, j)'] = df2['temp'].apply(checking_rest)
a = pd.DataFrame(df2['(i, j)'].tolist(), index=df2.index)
temp = df2.columns.get_loc('Cycle Name-none')
df2.insert(temp + 1, 'i', a[0])
df2.insert(temp + 2, 'j', a[1])


print("Done Labelling")
df2['Fuel Consumption per km-Diesel Equiv. l/km']= df2['Fuel Consumption per 100 km-Diesel Equiv. l/100km'] / 100
df2['Battery Energy Consumption per km-kWh/km']= df2['Battery Energy Consumption per Mile-kWh/mi']/1.60934
df2['Vehicle Static Mass-kg']= df2['Vehicle Static Mass-lbm']*0.453592
df3.head()

Labelling...
Done Labelling


Unnamed: 0,Vehicle Model-none,Cycle Name-none,Vehicle Static Mass-lbm,Tire Crr-none,Fuel Consumption per 100 km-Diesel Equiv. l/100km,Initial SOC-%,Battery Energy Consumption per Mile-kWh/mi,temp,S_lat,S_lon,E_lat,E_lon,Node_x,Lon_x,Lat_x,Node_y,Lon_y,Lat_y
0,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,20000,0.0061,20.071722,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,80.0,-84.24,39.38,,,
1,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,20000,0.00427,19.158632,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,80.0,-84.24,39.38,,,
2,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,32000,0.0061,22.468292,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,80.0,-84.24,39.38,,,
3,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,32000,0.00427,21.017119,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,80.0,-84.24,39.38,,,
4,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,44000,0.0061,25.056192,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,80.0,-84.24,39.38,,,


In [139]:
df2['i'] = df3['Node_x']#.astype(int)
df2['j'] = df3['Node_y']#.astype(int)
for ind,row in df2.iterrows():
    if pd.isnull(row.i):
        df2.i[ind] = row['(i, j)'][0]
    if pd.isnull(row.j):
        df2.j[ind]  = row['(i, j)'][1]

print("Dropping cols")
df2['i'] = df2['i'].astype(int)
df2['j'] = df2['j'].astype(int)
df2.to_csv('energy_w_n.csv')
# df2.drop(['temp','(i, j)', 'Fuel Consumption per 100 km-Diesel Equiv. l/100km', \
#           'Battery Energy Consumption per Mile-kWh/mi', \
#           'Vehicle Static Mass-lbm','S_lat','S_lon','E_lat','E_lon'], inplace=True, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.j[ind]  = row['(i, j)'][1]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.i[ind] = row['(i, j)'][0]


Dropping cols


In [140]:
print(set(np.arange(1, 85)) - set(df2.i.unique()).union(set(df2.j.unique()))) # Should be empty
df2.head(n = 10)

{38}


Unnamed: 0,Vehicle Model-none,Cycle Name-none,i,j,Vehicle Static Mass-lbm,Tire Crr-none,Fuel Consumption per 100 km-Diesel Equiv. l/100km,Initial SOC-%,Battery Energy Consumption per Mile-kWh/mi,temp,S_lat,S_lon,E_lat,E_lon,"(i, j)",Fuel Consumption per km-Diesel Equiv. l/km,Battery Energy Consumption per km-kWh/km,Vehicle Static Mass-kg
0,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,20000,0.0061,20.071722,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.200717,4e-06,9071.84
1,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,20000,0.00427,19.158632,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.191586,4e-06,9071.84
2,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,32000,0.0061,22.468292,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.224683,4e-06,14514.944
3,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,32000,0.00427,21.017119,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.210171,4e-06,14514.944
4,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,44000,0.0061,25.056192,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.250562,4e-06,19958.048
5,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,44000,0.00427,23.086852,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.230869,4e-06,19958.048
6,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,56000,0.0061,27.745914,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.277459,4e-06,25401.152
7,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,56000,0.00427,25.265434,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.252654,4e-06,25401.152
8,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,68000,0.0061,30.496375,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.304964,4e-06,30844.256
9,class8_conv_2021_low_sleeper_longhaul_FU19,Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8,80,50,68000,0.00427,27.50701,70.0,7e-06,"(39.379, -84.242, 41.687, -86.149)",39.38,-84.24,41.68,-86.14,"(80, 50)",0.27507,4e-06,30844.256


In [141]:
# 2) Edge List 
EdgeList = df2[['i', 'S_lat', 'S_lon', 'j', 'E_lat', 'E_lon']].copy().drop_duplicates(subset=['i', 'S_lat', 'S_lon', 'j', 'E_lat', 'E_lon'])
EdgeList.sort_values('i').reset_index(drop = True).to_csv('EdgeList_0.01.csv')

In [142]:
# 3) Edge difference (missing from current data, addr - df2)
# 'Load_mat_all_reso0.02_sort.csv'
address_edges = pd.read_csv(os.path.join(os.path.join(os.getcwd(), "reso0.01"), 'Load_mat_all_reso0.01_sort.csv'), \
                      usecols=['Start_Node', 'Lat_i', 'Long_i', 'End_Node', 'Lat_j', 'Long_j']).drop_duplicates(\
                      subset =['Start_Node', 'Lat_i', 'Long_i', 'End_Node', 'Lat_j', 'Long_j'])
address_edges = address_edges.rename(columns={"Start_Node": "i", "Lat_i": "S_lat", "Long_i": "S_lon", "End_Node":"j", "Lat_j":"E_lat", "Long_j":"E_lon"}, errors="raise")

Unnamed: 0,Start_Node,Lat_i,Long_i,End_Node,Lat_j,Long_j
0,11,41.5,-88.13,50,41.69,-86.15
1,50,41.69,-86.15,11,41.5,-88.13
2,50,41.69,-86.15,53,40.72,-86.03
3,50,41.69,-86.15,77,41.43,-85.03
5,4,42.09,-88.34,22,42.51,-87.91


In [145]:
EdgeList.dtypes

i          int64
S_lat    float64
S_lon    float64
j          int64
E_lat    float64
E_lon    float64
dtype: object

In [146]:
address_edges.dtypes

i          int64
S_lat    float64
S_lon    float64
j          int64
E_lat    float64
E_lon    float64
dtype: object

In [147]:
EdgeList.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 268 entries, 0 to 3252
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   i       268 non-null    int64  
 1   S_lat   268 non-null    float64
 2   S_lon   268 non-null    float64
 3   j       268 non-null    int64  
 4   E_lat   268 non-null    float64
 5   E_lon   268 non-null    float64
dtypes: float64(4), int64(2)
memory usage: 14.7 KB


In [148]:
address_edges.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 204 entries, 0 to 1416
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   i       204 non-null    int64  
 1   S_lat   204 non-null    float64
 2   S_lon   204 non-null    float64
 3   j       204 non-null    int64  
 4   E_lat   204 non-null    float64
 5   E_lon   204 non-null    float64
dtypes: float64(4), int64(2)
memory usage: 11.2 KB


In [149]:
# # # .drop_duplicates(\
# # #                       subset =['Start_Node', 'Lat_i', 'Long_i', 'End_Node', 'Lat_j', 'Long_j'])
# # # address = address.rename(columns={"Start_Node": "i", "Lat_i": "S_lat", "Long_i": "S_lon", "End_Node":"j", "Lat_j":"E_lat", "Long_j":"E_lon"}, errors="raise")
# # # from sqlite3 import connect
# # # conn = connect(':memory:')
# query2 = 'SELECT * FROM address as a EdgeList AS b WHERE a.i = b.i AND a.j = b.j'
# temp_table = sqldf(query2, globals())
# query1 = 'SELECT a.i, a.j FROM address AS a WHERE NOT EXIST temp_table'
# # BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# # db_path = os.path.join(BASE_DIR, "PupilPremiumTable.db")
# # with sqlite3.connect(db_path) as db:
# # query = 'select a.i, a.j from address a, EdgeList b where a.i != b.i and a.j != b.j'
# # query = 'select distinct * from address as a left outer join EdgeList b on a.i != b.i and a.j != b.j'
# sqldf(query, globals())
# # # pd.read_sql(query, conn)

# # rslt_df = address[(address['i'] == EdgeList['i'])&(address['j'] == EdgeList['i'])]
edge_diff = pd.merge(address_edges,EdgeList, indicator=True, how='outer')\
         .query('_merge=="left_only"')\
         .drop('_merge', axis=1).reset_index()
edge_diff.to_csv('Edge_diff_0.01.csv')
# pd.concat([EdgeList, address]).drop_duplicates(keep=False).reset_index()

In [150]:
df2.drop(['temp','(i, j)', 'Fuel Consumption per 100 km-Diesel Equiv. l/100km', \
          'Battery Energy Consumption per Mile-kWh/mi', \
          'Vehicle Static Mass-lbm','S_lat','S_lon','E_lat','E_lon'], inplace=True, axis=1)

# Performing OLS
Note:  -1 is a dummy value (for self-loops and array padding)

In [151]:
# Setup for csv files index and column labels
# df = df_conv
# df = df_fchev
# df = df_mild
# df = df_bev
# df = df_erev
df = df2.copy()

saveFolderName = "OLS_Data"
parent_dir = os.getcwd()
savePath = os.path.join(parent_dir, saveFolderName)

# If the folder already exists, remove the folder
if os.path.exists(savePath):
    shutil.rmtree(savePath)

vehicleTypes = pd.Series(df['Vehicle Model-none'].values).unique()
tireTypes = pd.Series(df['Tire Crr-none'].values).unique()

numNodes = len(set(df.i.unique()).union(set(df.j.unique())))
print("Number of detected Nodes: ", numNodes)
cols = ["i_" + str(i) for i in range(1, numNodes+1)]
rows = ["j_" + str(i) for i in range(1, numNodes+1)]

# Loop for Vehicle Types (k)
for tires in tireTypes:
    workFrame = df[df["Tire Crr-none"] == tires]
    tempPath1 = os.path.join(savePath, "Tire_cir_" + str(tires)) 
    os.makedirs(tempPath1) # Creating the directories

    for v in vehicleTypes:
        workFrame = df[df["Vehicle Model-none"] == v]
        need_elec = False
        need_fuel = False
        
        # Figuring out what type of vehicle is currently being processed and filtering out values that are not needed
        
        # Dropping Unneeded columns at this point
        workFrame = df.drop(['Tire Crr-none', 'Cycle Name-none', 'Vehicle Model-none'],axis= 1)
        
        if   v.find('conv') != -1 :
            v_type = 'C'
            need_fuel = True
        elif v.find('erev') != -1:
            v_type = 'E'
            need_elec = True
            need_fuel = True
        elif v.find('fchev') != -1:
            v_type = 'F'
            need_fuel = True
        elif v.find('mild') != -1:
            v_type = 'M'
            need_fuel = True
        elif v.find('bev') != -1:
            v_type = 'B'
            need_elec = True
        else:
            print('ERROR! Could not find vehicle Type')
            break
            
        print('\nFor Vehiecle: ', v, ', Tire: Cir_', tires)
        print('Detected type: ', v_type)
        
        
        # Create both files anyway, and then decide later when saving
        newFrame_elec = pd.DataFrame(columns = cols, index = rows)
        newFrame_fuel = pd.DataFrame(columns = cols, index = rows)
        
        # Iterate over node values i, j, and assign constants a, b, into "newFrame"
        for i in range(1, numNodes + 1):
            for j in range(1, numNodes + 1):

                data = workFrame.loc[((workFrame['i'] == i) & (workFrame['j'] == j))]

                # edge case if i = j or data Doesn't exist
                if i == j :
                    newFrame_elec.iat[i-1, j-1] = -1
                    newFrame_fuel.iat[i-1, j-1] = -1
                    continue

                elif data.size == 0:
                    newFrame_elec.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    newFrame_fuel.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    continue
                
                # Doing linear regression here
                if   v_type == 'C' : # type conv
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'B': # type bev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    X_e = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'E': # type erev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                    X_e = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                elif v_type == 'F': # type fchev
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'M': # type mild
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
    
                # Select if 2 linear regressions is needed
                # creating train and test sets
                if (need_fuel):
                    X_train, _, y_train, _ = train_test_split(X_f, Y_f, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_fuel.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0)) 
                    else:
                        newFrame_fuel.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))
                if (need_elec):
                    X_train, _, y_train, _ = train_test_split(X_e, Y_e, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_elec.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0))
                    else:
                        newFrame_elec.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))

        # Create the filePath
#         newFrame_fuel.fillna(0)
#         newFrame_elec.fillna(0)
        if (need_fuel):
            tempPath2 = os.path.join(tempPath1, "Fuel_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Fuel_" + v + ".pkl")
            newFrame_fuel.to_csv(tempPath2)
            newFrame_fuel.to_pickle(tempPath3)
        if (need_elec):
            tempPath2 = os.path.join(tempPath1, "Elec_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Elec_" + v + ".pkl")
            newFrame_elec.to_csv(tempPath2)
            newFrame_elec.to_pickle(tempPath3)

Number of detected Nodes:  85

For Vehiecle:  class8_conv_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  C

For Vehiecle:  class8_mild48v_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  M

For Vehiecle:  class8_erev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  E

For Vehiecle:  class8_bev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  B

For Vehiecle:  class8_fchev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  F

For Vehiecle:  class8_conv_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  C

For Vehiecle:  class8_mild48v_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  M

For Vehiecle:  class8_erev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  E

For Vehiecle:  class8_bev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  B

For Vehiecle:  class8_fchev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  F


# Function: readData
 ### Reads the source folder generated and returns a, b, c as dictionaries.
 
### Note:  -1 is a dummy value (for self-loops and array padding)

In [None]:
# OUTDATED 
# =======================================================
# FUNCTION: Reader of pickle files for (a, b) coefficients
#   a, b = readDataCSV("<Folder Name>")
#
# Arguments:
#   saveFolderName    (str ) the foldername for the stored pickle files. Default :"OLS_Data"
#
# Returns:
#   a [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
#   b [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
# 
#   Notes:
#   - i and j are 1 indexed, for convenience. i = 0 or j = 0, holds garbage values.
#   - The name of vehicles, k, needs to be exactly the same as the 
#     column from the datafile used to generate the pickle files.
# =======================================================

def readDataPKL (saveFolderName = "OLS_Data"):
    
    # Initialize some folderpath
    path = os.path.join(os.getcwd(), saveFolderName)
    
    # If the folder doesn't exist, print error and return
    if not os.path.exists(path):
        print("Error, Source Folder Doesn't exist")
        return 
    
    TireTypes = [i[9:] for i in os.listdir(path)]
    
    # Get the number of nodes brute forcedly
    filePath = os.path.join(path, 'Tire_cir_' + TireTypes[0])
    VehiecleSamplePath = [ fi for fi in os.listdir(filePath) if fi.endswith(".pkl") ]
    filePath = os.path.join(filePath, VehiecleSamplePath[0])
    df = pd.read_pickle(filePath)
    numNodes = np.shape(df.iloc[:, 0].values)[0]
    
    # Initializing return dictionary
    a = {}
    b = {}
    c = {}
    
    fuelType = ['Fuel','Elec']
    
    for t in TireTypes:
        t_path = os.path.join(path, 'Tire_cir_' + t)
        a[t] = {}
        b[t] = {}
        c[t] = {}
        VehiecleTypesPath = [ fi for fi in os.listdir(t_path) if fi.endswith(".pkl") ]
        VehiecleTypes = [i[5:len(i) - 4] for i in VehiecleTypesPath] # change to use split or something else
        VehiecleTypes = list(set(VehiecleTypes))# remove duplicates
        
        for f in fuelType:
            a[t][f] = {}
            b[t][f] = {}
            c[t][f] = {}
            for ind, v in enumerate(VehiecleTypes):
                filePath = os.path.join(t_path, f + "_" + v + ".pkl")
                if not os.path.exists(filePath):
                    a[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    b[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    c[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    continue 
                df = pd.read_pickle(filePath)

                # initialize an empty array to store the days
                a[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                b[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                c[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)

                results = df.to_numpy().transpose()
                for i, temp in enumerate(results, 1):
                    for j, ab in enumerate(temp, 1):
                        if type(ab) == tuple:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = ab[0], ab[1] , ab[2] 
                        else:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = np.nan, np.nan, np.nan

    return a, b, c

In [None]:
a, b, c = readDataPKL()

In [None]:
# Example lookup
t = '0.00427'
f = 'Elec' # or 'Fuel'
# v = 'class8_conv_2021_daycab_regionalhaul_FU19'
# v = 'class8_bev_2021_daycab_regionalhaul_FU19'
v = 'class8_bev_2021_low_sleeper_longhaul_FU19'
i = 1
j = 2

a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] 