In [33]:
import pandas as pd
from pandasql import sqldf
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from scipy import stats
import os

# Shutil is a recursive tool to delete file paths
# Note: Only needed if you plan to overwrite existing filepaths
import shutil

## Node Matching

In [34]:
match_list = pd.read_csv(os.getcwd() + '/given_sources/node_list.csv')
match_list

Unnamed: 0.1,Unnamed: 0,Node,Lat,Lon
0,0,1,41.69,-86.15
1,1,2,41.67,-85.99
2,2,3,41.43,-85.27
3,3,4,40.40,-86.85
4,4,5,40.72,-86.03
...,...,...,...,...
77,77,78,42.18,-87.94
78,78,79,41.72,-85.81
79,79,80,41.62,-88.12
80,80,81,42.00,-88.01


In [35]:
len(set(match_list.Lat.unique()).union(set(match_list.Lon.unique())))

139

In [36]:
# Getting the Trucks Data from Cummins Datasheet
dataSourcePath = os.getcwd() + '/given_sources/Batch_Div27_2021_03_months_Class_8_Results_metrics.xlsx'
df1 = pd.read_excel(dataSourcePath, index_col=None, usecols=['Vehicle Model-none',\
                      'Battery Energy Consumption per Mile-kWh/mi',\
                      'Fuel Consumption per 100 km-Diesel Equiv. l/100km',\
                      'Initial SOC-%',\
                      'Tire Crr-none',\
                      'Vehicle Static Mass-lbm',\
                      'Cycle Name-none'])

In [37]:
# Checking Function that compares the cycle name to the node_matching_list to label node indices
# Into the original datasheet
def checking(x):
    
    start_node_matched = False
    end_node_matched = False
    thres = 0.01
    
    for idx, lat in enumerate(match_list['Lat'].values):
        lon = match_list['Lon'].values[idx]

        a = x.replace('p', '.')
        a = a.split('_')[1:5]
        a = [round(float(j), 2) for j in a]

        # If start node matched matches
        if (((a[0] <= round(lat+thres,2)) and (a[0] >= round(lat-thres, 2))) and ((a[1] <= round(lon+thres, 2)) and (a[1] >= round(lon-thres, 2))) and not start_node_matched):
            # get the start, and end node
            i = match_list['Node'].iloc[[idx]].values[0]
            start_node_matched = True
#             print('Start Matched')
            
            
        # If end node matched
        elif(((a[2] <= round(lat+thres, 2)) and (a[2] >= round(lat-thres, 2))) and ((a[3] <= round(lon+thres, 2)) and (a[3] >= round(lon-thres, 2))) and not end_node_matched):
            j = match_list['Node'].iloc[[idx]].values[0]
            end_node_matched = True
#             print('End Matched')
            
        # If both nodes are already matched, exit
        if(start_node_matched and end_node_matched):
            return (int(i), int(j))
        
    return np.nan

In [38]:
test_case = 'Edge_39p379_-84p242_41p687_-86p149_Raw_Class_8'
# test_case = 'Edge_40p765_-87p112_41p687_-86p149_Raw_Class_8'
test_case = 'Edge_41p687_-86p149_42p253_-85p550_Raw_Class_8' # 1, 
test_case = 'Edge_41p687_-86p149_42p249_-85p543_Raw_Class_8' # 1, 
test_case = 'Edge_41p687_-86p149_42p903_-85p535_Raw_Class_8' # 1, 21
test_case = 'Edge_42p094_-88p345_42p100_-88p347_Raw_Class_8'
test_case = 'Edge_42p817_-85p986_42p812_-86p001_Raw_Class_8'
test_case = 'Edge_42p094_-88p345_42p100_-88p347_Raw_Class_8'

print("(Start Node, End Node) =", checking(test_case))
a = test_case.replace('p', '.')
a = a.split('_')[1:5]
a = [round(float(j), 2) for j in a]
print("From Data: ", a)

test_case = 'Edge_42p094_-88p345_42p100_-88p347_Raw_Class_8'
# Node 9, lon, lat: -88.34	42.1  
# Node37, lon, lat: -88.34	42.08 

(Start Node, End Node) = (18, 38)
From Data:  [42.09, -88.34, 42.1, -88.35]


In [39]:
# Finding the smallest range between 2 columns

In [43]:
# This portion of code mainly runs conversion and relabling
df2 = df1.copy()
print("Done Copying")
print("Labelling...")
temp = df2.columns.get_loc('Cycle Name-none')
df2['temp'] = df2['Cycle Name-none'].apply(checking)

a = pd.DataFrame(df2['temp'].tolist(), index=df2.index)
df2.insert(temp + 1, 'i', a[0])
df2.insert(temp + 2, 'j', a[1])

print("Done Labelling")
df2['Fuel Consumption per km-Diesel Equiv. l/km']= df2['Fuel Consumption per 100 km-Diesel Equiv. l/100km'] / 100
df2['Battery Energy Consumption per km-kWh/km']= df2['Battery Energy Consumption per Mile-kWh/mi']/1.60934
df2['Vehicle Static Mass-kg']= df2['Vehicle Static Mass-lbm']*0.453592
print("Dropping cols")
df2.drop(['temp', 'Fuel Consumption per 100 km-Diesel Equiv. l/100km', \
          'Battery Energy Consumption per Mile-kWh/mi', \
          'Vehicle Static Mass-lbm'], inplace=True, axis=1)
df2.head(n = 10)

# Make File 
print("file made!")
df2.to_csv('given_sources/labelled_data.csv')

Done Copying
Labelling...
Done Labelling
Dropping cols
file made!


# Performing OLS
Note:  -1 is a dummy value (for self-loops and array padding)

In [48]:
# Setup for csv files index and column labels
# df = df_conv
# df = df_fchev
# df = df_mild
# df = df_bev
# df = df_erev
df = df2.copy()

saveFolderName = "OLS_Data"
parent_dir = os.getcwd()
savePath = os.path.join(parent_dir, saveFolderName)

# If the folder already exists, remove the folder
if os.path.exists(savePath):
    shutil.rmtree(savePath)

vehicleTypes = pd.Series(df['Vehicle Model-none'].values).unique()
tireTypes = pd.Series(df['Tire Crr-none'].values).unique()

numNodes = len(set(df.i.unique()).union(set(df.j.unique())))
print("Number of detected Nodes: ", numNodes)
cols = ["i_" + str(i) for i in range(1, numNodes+1)]
rows = ["j_" + str(i) for i in range(1, numNodes+1)]

# Loop for Vehicle Types (k)
for tires in tireTypes:
    workFrame = df[df["Tire Crr-none"] == tires]
    tempPath1 = os.path.join(savePath, "Tire_cir_" + str(tires)) 
    os.makedirs(tempPath1) # Creating the directories

    for v in vehicleTypes:
        workFrame = df[df["Vehicle Model-none"] == v]
        need_elec = False
        need_fuel = False
        
        # Figuring out what type of vehicle is currently being processed and filtering out values that are not needed
        
        # Dropping Unneeded columns at this point
        workFrame = df.drop(['Tire Crr-none', 'Cycle Name-none', 'Vehicle Model-none'],axis= 1)
        
        if   v.find('conv') != -1 :
            v_type = 'C'
            need_fuel = True
        elif v.find('erev') != -1:
            v_type = 'E'
            need_elec = True
            need_fuel = True
        elif v.find('fchev') != -1:
            v_type = 'F'
            need_fuel = True
        elif v.find('mild') != -1:
            v_type = 'M'
            need_fuel = True
        elif v.find('bev') != -1:
            v_type = 'B'
            need_elec = True
        else:
            print('ERROR! Could not find vehicle Type')
            break
            
        print('\nFor Vehiecle: ', v, ', Tire: Cir_', tires)
        print('Detected type: ', v_type)
        
        
        # Create both files anyway, and then decide later when saving
        newFrame_elec = pd.DataFrame(columns = cols, index = rows)
        newFrame_fuel = pd.DataFrame(columns = cols, index = rows)
        
        # Iterate over node values i, j, and assign constants a, b, into "newFrame"
        for i in range(1, numNodes + 1):
            for j in range(1, numNodes + 1):

                data = workFrame.loc[((workFrame['i'] == i) & (workFrame['j'] == j))]

                # edge case if i = j or data Doesn't exist
                if i == j :
                    newFrame_elec.iat[i-1, j-1] = -1
                    newFrame_fuel.iat[i-1, j-1] = -1
                    continue

                elif data.size == 0:
                    newFrame_elec.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    newFrame_fuel.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    continue
                
                # Doing linear regression here
                if   v_type == 'C' : # type conv
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'B': # type bev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    X_e = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'E': # type erev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                    X_e = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                elif v_type == 'F': # type fchev
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'M': # type mild
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
    
                # Select if 2 linear regressions is needed
                # creating train and test sets
                if (need_fuel):
                    X_train, _, y_train, _ = train_test_split(X_f, Y_f, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_fuel.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0)) 
                    else:
                        newFrame_fuel.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))
                if (need_elec):
                    X_train, _, y_train, _ = train_test_split(X_e, Y_e, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_elec.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0))
                    else:
                        newFrame_elec.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))

        # Create the filePath
#         newFrame_fuel.fillna(0)
#         newFrame_elec.fillna(0)
        if (need_fuel):
            tempPath2 = os.path.join(tempPath1, "Fuel_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Fuel_" + v + ".pkl")
            newFrame_fuel.to_csv(tempPath2)
            newFrame_fuel.to_pickle(tempPath3)
        if (need_elec):
            tempPath2 = os.path.join(tempPath1, "Elec_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Elec_" + v + ".pkl")
            newFrame_elec.to_csv(tempPath2)
            newFrame_elec.to_pickle(tempPath3)

Number of detected Nodes:  82

For Vehiecle:  class8_conv_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  C

For Vehiecle:  class8_mild48v_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  M

For Vehiecle:  class8_erev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  E

For Vehiecle:  class8_bev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  B

For Vehiecle:  class8_fchev_2021_low_sleeper_longhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  F

For Vehiecle:  class8_conv_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  C

For Vehiecle:  class8_mild48v_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  M

For Vehiecle:  class8_erev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  E

For Vehiecle:  class8_bev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  B

For Vehiecle:  class8_fchev_2021_daycab_regionalhaul_FU19 , Tire: Cir_ 0.0061
Detected type:  F


# Function: readData
 ### Reads the source folder generated and returns a, b, c as dictionaries.
 
### Note:  -1 is a dummy value (for self-loops and array padding)

In [49]:
# OUTDATED 
# =======================================================
# FUNCTION: Reader of pickle files for (a, b) coefficients
#   a, b = readDataCSV("<Folder Name>")
#
# Arguments:
#   saveFolderName    (str ) the foldername for the stored pickle files. Default :"OLS_Data"
#
# Returns:
#   a [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
#   b [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
# 
#   Notes:
#   - i and j are 1 indexed, for convenience. i = 0 or j = 0, holds garbage values.
#   - The name of vehicles, k, needs to be exactly the same as the 
#     column from the datafile used to generate the pickle files.
# =======================================================

def readDataPKL (saveFolderName = "OLS_Data"):
    
    # Initialize some folderpath
    path = os.path.join(os.getcwd(), saveFolderName)
    
    # If the folder doesn't exist, print error and return
    if not os.path.exists(path):
        print("Error, Source Folder Doesn't exist")
        return 
    
    TireTypes = [i[9:] for i in os.listdir(path)]
    
    # Get the number of nodes brute forcedly
    filePath = os.path.join(path, 'Tire_cir_' + TireTypes[0])
    VehiecleSamplePath = [ fi for fi in os.listdir(filePath) if fi.endswith(".pkl") ]
    filePath = os.path.join(filePath, VehiecleSamplePath[0])
    df = pd.read_pickle(filePath)
    numNodes = np.shape(df.iloc[:, 0].values)[0]
    
    # Initializing return dictionary
    a = {}
    b = {}
    c = {}
    
    fuelType = ['Fuel','Elec']
    
    for t in TireTypes:
        t_path = os.path.join(path, 'Tire_cir_' + t)
        a[t] = {}
        b[t] = {}
        c[t] = {}
        VehiecleTypesPath = [ fi for fi in os.listdir(t_path) if fi.endswith(".pkl") ]
        VehiecleTypes = [i[5:len(i) - 4] for i in VehiecleTypesPath] # change to use split or something else
        VehiecleTypes = list(set(VehiecleTypes))# remove duplicates
        
        for f in fuelType:
            a[t][f] = {}
            b[t][f] = {}
            c[t][f] = {}
            for ind, v in enumerate(VehiecleTypes):
                filePath = os.path.join(t_path, f + "_" + v + ".pkl")
                if not os.path.exists(filePath):
                    a[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    b[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    c[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    continue 
                df = pd.read_pickle(filePath)

                # initialize an empty array to store the days
                a[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                b[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                c[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)

                results = df.to_numpy().transpose()
                for i, temp in enumerate(results, 1):
                    for j, ab in enumerate(temp, 1):
                        if type(ab) == tuple:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = ab[0], ab[1] , ab[2] 
                        else:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = np.nan, np.nan, np.nan

    return a, b, c

In [50]:
a, b, c = readDataPKL()

In [51]:
# Example lookup
t = '0.00427'
f = 'Elec' # or 'Fuel'
# v = 'class8_conv_2021_daycab_regionalhaul_FU19'
# v = 'class8_bev_2021_daycab_regionalhaul_FU19'
v = 'class8_bev_2021_low_sleeper_longhaul_FU19'
i = 1
j = 2

a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] 

(0.0, 1.2455390960503074e-05, 0.2943450392769703)