In [2]:
import pandas as pd
# from pandasql import sqldf
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from scipy import stats
import os

# Shutil is a recursive tool to delete file paths
# Note: Only needed if you plan to overwrite existing filepaths
import shutil

## Node Matching

In [3]:
match_list = pd.read_csv(os.path.join(os.path.join(os.getcwd(), "reso0.02"), 'Node_list_reso0.02_sort.csv'),index_col=0)
match_list

Unnamed: 0,Node,Lon,Lat
0,1,-89.02,41.84
1,2,-88.36,41.80
2,3,-88.34,42.08
3,4,-88.34,42.10
4,5,-88.32,42.02
...,...,...,...
79,80,-83.56,42.24
80,81,-83.54,41.58
81,82,-83.38,42.14
82,83,-83.36,42.38


In [4]:
# Getting the Trucks Data from Cummins Datasheet
dataSourcePath = 'given_sources//Batch_Div27_2021_03_months_Class_8_Results_metrics.xlsx'
df1 = pd.read_excel(dataSourcePath, index_col=None, usecols=['Vehicle Model-none',\
                      'Battery Energy Consumption per Mile-kWh/mi',\
                      'Fuel Consumption per 100 km-Diesel Equiv. l/100km',\
                      'Initial SOC-%',\
                      'Tire Crr-none',\
                      'Vehicle Static Mass-lbm',\
                      'Cycle Name-none'])

In [16]:
# Ruiting Checking method
# Logarithmic histogram plots
from cmath import nan


def checking_par(x):
    a = x.replace('p', '.')
    a = a.split('_')[1:5]
    a = [j for j in a]
    return (float(a[0]), float(a[1]), float(a[2]), float(a[3]))

def checking_rest(a):
    start_node_matched = False
    end_node_matched = False
    thres = 0.02
    
    a = [float(x) for x in a]
    
    lon_lst = match_list['Lon'].values
    for idx, lat in enumerate(match_list['Lat'].values):
        lon = lon_lst[idx]

        # Calculate the distance between node in data and nodelist
        # NEED MODIFICATION
        '''
        i_dist = np.sqrt((lat-a[0])**2 + (lon - a[1])**2) 
        j_dist = np.sqrt((lat-a[2])**2 + (lon - a[3])**2)
        '''
        
        
        if ((i_dist <= thres) and not start_node_matched):
            i = match_list['Node'].iloc[[idx]].values[0]
            start_node_matched = True
        
        if ((j_dist <= thres) and not end_node_matched):
            j = match_list['Node'].iloc[[idx]].values[0]
            end_node_matched = True
        
        # If both nodes are already matched, exit
        if(start_node_matched and end_node_matched):
            return (int(i), int(j))
        
    return np.nan

df2 = df1.copy()
print("Labelling...")
df2['temp'] = df2['Cycle Name-none'].apply(checking_par)
df2[['S_lat','S_lon','E_lat','E_lon']] = pd.DataFrame(df2.temp.tolist(), index= df2.index).div(2).round(2).multiply(2)
df3 = df2.merge(match_list,how='left',left_on=['S_lat','S_lon'],right_on=['Lat','Lon']).merge(match_list,how='left',left_on=['E_lat','E_lon'],right_on=['Lat','Lon'])

df2['(i, j)'] = df2['temp'].apply(checking_rest)
# a = pd.DataFrame(df2['(i, j)'].tolist(), index=df2.index)
temp = df2.columns.get_loc('Cycle Name-none')

df2.insert(temp + 1, 'i', 0)
df2.insert(temp + 2, 'j', 0)


print("Done Labelling")
df2['Fuel Consumption per km-Diesel Equiv. l/km']= df2['Fuel Consumption per 100 km-Diesel Equiv. l/100km'] / 100
df2['Battery Energy Consumption per km-kWh/km']= df2['Battery Energy Consumption per Mile-kWh/mi']/1.60934
df2['Vehicle Static Mass-kg']= df2['Vehicle Static Mass-lbm']*0.453592

NameError: name 'df1' is not defined

In [None]:
df2.head()

In [None]:
df2['i'] = df3['Node_x']#.astype(int)
df2['j'] = df3['Node_y']#.astype(int)
for ind,row in df2.iterrows():
    if pd.isnull(row.i):
        df2.i[ind] = row['(i, j)'][0]
    if pd.isnull(row.j):
        df2.j[ind]  = row['(i, j)'][1]

print("Dropping cols")
df2['i'] = df2['i'].astype(int)
df2['j'] = df2['j'].astype(int)
df2.to_csv('energy_w_n.csv')
# df2.drop(['temp','(i, j)', 'Fuel Consumption per 100 km-Diesel Equiv. l/100km', \
#           'Battery Energy Consumption per Mile-kWh/mi', \
#           'Vehicle Static Mass-lbm','S_lat','S_lon','E_lat','E_lon'], inplace=True, axis=1)

In [None]:
print(set(np.arange(1, 85)) - set(df2.i.unique()).union(set(df2.j.unique()))) # Should be empty
df2.head(n = 10)

# Performing OLS
Note:  -1 is a dummy value (for self-loops and array padding)

In [None]:
# Setup for csv files index and column labels
# df = df_conv
# df = df_fchev
# df = df_mild
# df = df_bev
# df = df_erev
df = df2.copy()

saveFolderName = "OLS_Data"
parent_dir = os.getcwd()
savePath = os.path.join(parent_dir, saveFolderName)

# If the folder already exists, remove the folder
if os.path.exists(savePath):
    shutil.rmtree(savePath)

vehicleTypes = pd.Series(df['Vehicle Model-none'].values).unique()
tireTypes = pd.Series(df['Tire Crr-none'].values).unique()

numNodes = len(set(df.i.unique()).union(set(df.j.unique())))
print("Number of detected Nodes: ", numNodes)
cols = ["i_" + str(i) for i in range(1, numNodes+1)]
rows = ["j_" + str(i) for i in range(1, numNodes+1)]

# Loop for Vehicle Types (k)
for tires in tireTypes:
    workFrame = df[df["Tire Crr-none"] == tires]
    tempPath1 = os.path.join(savePath, "Tire_cir_" + str(tires)) 
    os.makedirs(tempPath1) # Creating the directories

    for v in vehicleTypes:
        workFrame = df[df["Vehicle Model-none"] == v]
        need_elec = False
        need_fuel = False
        
        # Figuring out what type of vehicle is currently being processed and filtering out values that are not needed
        
        # Dropping Unneeded columns at this point
        workFrame = df.drop(['Tire Crr-none', 'Cycle Name-none', 'Vehicle Model-none'],axis= 1)
        
        if   v.find('conv') != -1 :
            v_type = 'C'
            need_fuel = True
        elif v.find('erev') != -1:
            v_type = 'E'
            need_elec = True
            need_fuel = True
        elif v.find('fchev') != -1:
            v_type = 'F'
            need_fuel = True
        elif v.find('mild') != -1:
            v_type = 'M'
            need_fuel = True
        elif v.find('bev') != -1:
            v_type = 'B'
            need_elec = True
        else:
            print('ERROR! Could not find vehicle Type')
            break
            
        print('\nFor Vehiecle: ', v, ', Tire: Cir_', tires)
        print('Detected type: ', v_type)
        
        
        # Create both files anyway, and then decide later when saving
        newFrame_elec = pd.DataFrame(columns = cols, index = rows)
        newFrame_fuel = pd.DataFrame(columns = cols, index = rows)
        
        # Iterate over node values i, j, and assign constants a, b, into "newFrame"
        for i in range(1, numNodes + 1):
            for j in range(1, numNodes + 1):

                data = workFrame.loc[((workFrame['i'] == i) & (workFrame['j'] == j))]

                # edge case if i = j or data Doesn't exist
                if i == j :
                    newFrame_elec.iat[i-1, j-1] = -1
                    newFrame_fuel.iat[i-1, j-1] = -1
                    continue

                elif data.size == 0:
                    newFrame_elec.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    newFrame_fuel.iat[i-1, j-1] = 0 #(0.0, 0.0, 0.0)
                    continue
                
                # Doing linear regression here
                if   v_type == 'C' : # type conv
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'B': # type bev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    X_e = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'E': # type erev
                    Y_e = data['Battery Energy Consumption per km-kWh/km']
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                    X_e = data[['Initial SOC-%', 'Vehicle Static Mass-kg']]
                elif v_type == 'F': # type fchev
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
                elif v_type == 'M': # type mild
                    Y_f = data['Fuel Consumption per km-Diesel Equiv. l/km']
                    X_f = data['Vehicle Static Mass-kg'].to_frame()
    
                # Select if 2 linear regressions is needed
                # creating train and test sets
                if (need_fuel):
                    X_train, _, y_train, _ = train_test_split(X_f, Y_f, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_fuel.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0)) 
                    else:
                        newFrame_fuel.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))
                if (need_elec):
                    X_train, _, y_train, _ = train_test_split(X_e, Y_e, test_size=0.000001)
                    LR = LinearRegression()  # create object for the class
                    LR.fit(X_train, y_train)  # perform linear regression
                    if (len(LR.coef_) == 2): # need to check if it works
                        newFrame_elec.iat[j - 1, i - 1] = (LR.coef_.item(0), LR.coef_.item(1), LR.intercept_.item(0))
                    else:
                        newFrame_elec.iat[j - 1, i - 1] = (0.0, LR.coef_.item(0), LR.intercept_.item(0))

        # Create the filePath
#         newFrame_fuel.fillna(0)
#         newFrame_elec.fillna(0)
        if (need_fuel):
            tempPath2 = os.path.join(tempPath1, "Fuel_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Fuel_" + v + ".pkl")
            newFrame_fuel.to_csv(tempPath2)
            newFrame_fuel.to_pickle(tempPath3)
        if (need_elec):
            tempPath2 = os.path.join(tempPath1, "Elec_" + v + ".csv")
            tempPath3 = os.path.join(tempPath1, "Elec_" + v + ".pkl")
            newFrame_elec.to_csv(tempPath2)
            newFrame_elec.to_pickle(tempPath3)


# Function: readData
 ### Reads the source folder generated and returns a, b, c as dictionaries.
 
### Note:  -1 is a dummy value (for self-loops and array padding)

In [None]:
# OUTDATED 
# =======================================================
# FUNCTION: Reader of pickle files for (a, b) coefficients
#   a, b = readDataCSV("<Folder Name>")
#
# Arguments:
#   saveFolderName    (str ) the foldername for the stored pickle files. Default :"OLS_Data"
#
# Returns:
#   a [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
#   b [ k ][ i ][ j ] (dict) k is the Vehiecle Type, i is the starting node, and j is the end node
# 
#   Notes:
#   - i and j are 1 indexed, for convenience. i = 0 or j = 0, holds garbage values.
#   - The name of vehicles, k, needs to be exactly the same as the 
#     column from the datafile used to generate the pickle files.
# =======================================================

def readDataPKL (saveFolderName = "OLS_Data"):
    
    # Initialize some folderpath
    path = os.path.join(os.getcwd(), saveFolderName)
    
    # If the folder doesn't exist, print error and return
    if not os.path.exists(path):
        print("Error, Source Folder Doesn't exist")
        return 
    
    TireTypes = [i[9:] for i in os.listdir(path)]
    
    # Get the number of nodes brute forcedly
    filePath = os.path.join(path, 'Tire_cir_' + TireTypes[0])
    VehiecleSamplePath = [ fi for fi in os.listdir(filePath) if fi.endswith(".pkl") ]
    filePath = os.path.join(filePath, VehiecleSamplePath[0])
    df = pd.read_pickle(filePath)
    numNodes = np.shape(df.iloc[:, 0].values)[0]
    
    # Initializing return dictionary
    a = {}
    b = {}
    c = {}
    
    fuelType = ['Fuel','Elec']
    
    for t in TireTypes:
        t_path = os.path.join(path, 'Tire_cir_' + t)
        a[t] = {}
        b[t] = {}
        c[t] = {}
        VehiecleTypesPath = [ fi for fi in os.listdir(t_path) if fi.endswith(".pkl") ]
        VehiecleTypes = [i[5:len(i) - 4] for i in VehiecleTypesPath] # change to use split or something else
        VehiecleTypes = list(set(VehiecleTypes))# remove duplicates
        
        for f in fuelType:
            a[t][f] = {}
            b[t][f] = {}
            c[t][f] = {}
            for ind, v in enumerate(VehiecleTypes):
                filePath = os.path.join(t_path, f + "_" + v + ".pkl")
                if not os.path.exists(filePath):
                    a[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    b[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    c[t][f][v] = np.full((numNodes + 1, numNodes + 1), np.nan)
                    continue 
                df = pd.read_pickle(filePath)

                # initialize an empty array to store the days
                a[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                b[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)
                c[t][f][v] = np.full((numNodes + 1, numNodes + 1), -1, dtype = float)

                results = df.to_numpy().transpose()
                for i, temp in enumerate(results, 1):
                    for j, ab in enumerate(temp, 1):
                        if type(ab) == tuple:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = ab[0], ab[1] , ab[2] 
                        else:
                            a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] = np.nan, np.nan, np.nan

    return a, b, c

In [None]:
a, b, c = readDataPKL()

In [None]:
# Example lookup
t = '0.00427'
f = 'Elec' # or 'Fuel'
# v = 'class8_conv_2021_daycab_regionalhaul_FU19'
# v = 'class8_bev_2021_daycab_regionalhaul_FU19'
v = 'class8_bev_2021_low_sleeper_longhaul_FU19'
i = 1
j = 2

a[t][f][v][i][j], b[t][f][v][i][j], c[t][f][v][i][j] 