In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import os

# Shutil is a recursive tool to delete file paths
# Note: Only needed if you plan to overwrite existing filepaths
import shutil

# Function: generateData
### Creates the lognormal Sampled data and stores it in csv files. 

> (str) dataSourcePath : csv data file path  
(str) saveFolderName : Name of the folder to save the data (non-existing)
(int) days : # of days
(float) mean multiplier for the lognormal standard deviation  
(int) randomSeed : andom seed for the lognormal Sampling process


#### Note:  -1 is a placeholder for N/A values (self loops) and -2 is used as padding (for lookup convenience) 


In [2]:
# This function will store the sampled data into multiple csv files.
# To be accurate, the number of files generated will be:
# (Total CSV Files) = (# of Vehiecle Types) * (# of days) * (# of states) 

def generateData (dataSourcePath = 'edge_cost.csv', saveFolderName = "FreshData", days = 1, meanMult = 0.15, randomSeed = 1):
    np.random.seed(randomSeed)
    df = pd.read_csv(dataSourcePath)
    parent_dir = os.getcwd()
    savePath = os.path.join(parent_dir, saveFolderName)
    
    # If the folder already exists, remove the folder
    if os.path.exists(savePath):
        shutil.rmtree(savePath)
    
    # Getting information from the table
    vehiecleTypes = df.Type.unique()
    numNodes = max(df.Start_Node.max(), df.End_Node.max())
    cols = ["i_" + str(i) for i in range(1, numNodes+1)]
    index = ["j_" + str(i) for i in range(1, numNodes+1)]
                   
    # Get the edge mean for vehiecle
    for v in vehiecleTypes:
        tempPath = os.path.join(savePath, v)
        workFrame = df[df["Type"] == v]
        # Iterate over Days
        for d in range(1, days + 1) :
            tempPath1 = os.path.join(tempPath, "d" + str(d))
            os.makedirs(tempPath1)
            newFrame = pd.DataFrame(columns = cols, index = index)
                   
            for s in range(1, 7):
                state = "State" + str(s) + "_kwh_mile"
                
                # Iterate over node values.
                for i in range(1, numNodes + 1):
                    for j in range(1, numNodes + 1):
                        # This line below gives a frame of the cost of an edge
                        data = workFrame.loc[((workFrame['Start_Node'] == i) & (workFrame['End_Node'] == j))][state]
                        
                        # edge case if i = j
                        if data.size == 0:
                            newFrame.iat[i-1, j-1] = -1
                            continue
                        mean = data.values[0]
                        stdv = meanMult * mean

                        # convention of smaller value first
                        sample = np.random.lognormal(mean, stdv)
                        sampleScipy = stats.lognorm(s = stdv, scale = mean)
                        newFrame.iat[i - 1, j - 1] = sampleScipy.rvs()

                # Create the filePath
                tempPath2 = os.path.join(tempPath1, "state" + str(s) + ".csv")
                newFrame.to_csv(tempPath2)

In [3]:
generateData (days = 5, randomSeed = 1)

# Function: readData
 ### Reads the source folder generated by the  "generateData" function and stores it into a 5-dimensioned dictionary.
> Input:  
(str) folderName: Folder Name of the source folder
  
Output: 
> arr [ v ][ d ][ k ][ i ][ j ]

>v : Vehiecle Type (string)  
d : Day   (int), index starts from 1  
k : State (int), index starts from 1  
i : Node  (int), index starts from 1  
j : Node  (int), index starts from 1  

#### Note:  -1 is a placeholder for N/A values (self loops) and -2 is used as padding (for lookup convenience) 

In [10]:
# Reads out the data and collects them into a dictionary.

def readDataCSV (folderName = "FreshData"):
    # Initialize some information
    numDays = 0
    path = os.path.join(os.getcwd(), folderName)
    # If the folder doesn't exists, remove the folder
    if not os.path.exists(path):
        print("Error, Source Folder Doesn't exist")
        return 
    VehiecleTypes = os.listdir(path)
    arr = {}
    
    # Get the number of nodes brute forcedly
    tempath = os.path.join(path, VehiecleTypes[0])
    days_list = os.listdir(tempath)
    days = len(days_list)
    tempath = os.path.join(tempath, days_list[0]) #adding the days into the path
    tempath = os.path.join(tempath, os.listdir(tempath)[0]) #adding the state.csv into the path
    df = pd.read_csv(tempath)
    df = df.iloc[: , 1:] # Take away the extra column
    numNodes = np.shape(df.values)[0]

    for v in VehiecleTypes:
        # initialize an empty array to store the days
        arr[v] = np.full((days + 1, 6 + 1, numNodes + 1, numNodes + 1), -2, dtype = np.float64)
        vehieclePath = os.path.join(path, v)
        
        # Iterating over the days
        for d in range(1, days+1):
            filename = "d" + str(d)
            dayPath = os.path.join(vehieclePath, filename)
            
            # Iterate over all the states.
            for s in range(1, 7):
                statePath = os.path.join(dayPath, "state" + str(s) + ".csv")
#                 print("\nCurr Path: ", statePath, "/n")
                df = pd.read_csv(statePath)
                
                # Remove extra column from the start
                df = df.iloc[: , 1:]

                # Padding for indexing starting from 1
                def pad_with(vector, pad_width, iaxis, kwargs):
                    pad_value = kwargs.get('padder', 10)
                    vector[:pad_width[0]] = pad_value
                    vector[-pad_width[1]:] = pad_value

                results = df.to_numpy().transpose()
                padded = np.pad(results, 1, pad_with, padder= -2)
                np.shape(padded[:-1, :-1])
                
                # Convert numpy array to normal lists
                padded = padded[:-1, :-1]
#                 print(padded)
                # Assignment of the array 
                arr[v][d][s] = padded
    return arr

In [11]:
testarr = readDataCSV()


Curr Path:  C:\Users\admin\Documents\__SP22\DOETrucksMeeting\FreshData\daycab_regionalhaul\d1\state1.csv /n
[[-2.         -2.         -2.         -2.         -2.         -2.
  -2.        ]
 [-2.         -1.          1.34414673  1.26072118  1.50526378  1.48539383
   1.37452443]
 [-2.          1.75830858 -1.          1.45350336  1.34190621  1.58259595
   1.18780089]
 [-2.          0.90779785  1.20880398 -1.          1.55139745  1.9801727
   1.15680332]
 [-2.          1.7609275   1.1012993   1.05857506 -1.          1.62926965
   1.96160468]
 [-2.          2.17752755  1.67973819  1.47658497  1.18486434 -1.
   1.62025265]
 [-2.          1.11426269  2.06419107  1.30256139  1.86006468  1.10534788
  -1.        ]]

Curr Path:  C:\Users\admin\Documents\__SP22\DOETrucksMeeting\FreshData\daycab_regionalhaul\d1\state2.csv /n
[[-2.         -2.         -2.         -2.         -2.         -2.
  -2.        ]
 [-2.         -1.          1.64901626  1.45735481  1.35930856  1.04340632
   1.44166164]
 [-2.

[[-2.         -2.         -2.         -2.         -2.         -2.
  -2.        ]
 [-2.         -1.          2.87009346  1.64060212  2.67431771  1.90110959
   2.24551315]
 [-2.          2.23171117 -1.          2.12887422  2.05000123  3.50026516
   2.75495508]
 [-2.          2.53794264  2.26528711 -1.          1.30219746  1.36323606
   1.75693695]
 [-2.          1.57131605  2.50338197  1.94373887 -1.          2.3796998
   2.23269063]
 [-2.          1.70615893  2.26868851  1.56452076  2.72650678 -1.
   2.82653047]
 [-2.          1.85167562  1.46520676  1.66206362  1.76952477  1.42194367
  -1.        ]]

Curr Path:  C:\Users\admin\Documents\__SP22\DOETrucksMeeting\FreshData\low_sleeper_longhaul\d3\state5.csv /n
[[-2.         -2.         -2.         -2.         -2.         -2.
  -2.        ]
 [-2.         -1.          2.21447362  1.53276019  2.09305756  0.98157688
   1.41796004]
 [-2.          2.71264168 -1.          1.9094792   2.76707831  1.93152702
   3.21971853]
 [-2.          1.4031195

In [12]:
# Example lookup
testarr['daycab_regionalhaul'][4][5][2][1]

2.04703264333656