# Truck 2 Analysis

#### Imports and global variables are defined here.

In [1]:
# Imports required for this notebook.
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt

# Local path to the CSV file containing the data for truck two (2).
truckTwoPath = "../data/trucks/truck2.csv"

# Local path to the column dictionary for truck two (2).
truckTwoDictPath = "../data/dictionary/truck2dict.csv"

#### Functions are defined here.
*NOTE: We need to discuss reusable code and how we're going to handle commonly-used functions.*

In [2]:
"""Uses Pandas's read_csv method to read a CSV file and returns a DataFrame of it to the notebook.
This function reads in all rows and defines the header row at index 0 by default."""
def readCsv(truck, records = None, headerIdx = 0):
    return pd.read_csv(truck, nrows = records, header = [headerIdx])

"""Creates a Python dictionary from a pre-defined CSV dictionary.
This will only look at the first two columns of the given CSV file."""
def createPythonDictionary(dictionaryCSV):
    # Initialize the resulting dictionary.
    dictResult = {}
    
    # Opens the passed in CSV which defines the rename dictionary and iterates through it to store each value into a Python dictionary.
    with open(dictionaryCSV, 'r') as f:
        for row in csv.reader(f):
            dictResult.update({row[0]:row[1]})
    return dictResult

"""Returns a DataFrame without the UTC columns. Specific to the given Volvo truck data."""
def cullUtcCols(truckData):
    noUTC = truckData.drop(["UTC hour", "UTC minute", "UTC second", "UTC month", "UTC day", "UTC year"], axis = 1)
    return noUTC

"""Takes a DataFrame and a dictionary as parameters; uses the dictionary to rename all matching columns then
returns the changed DataFrame."""
def renameColumns(dataFrame, dictionary):
    return dataFrame.rename(columns = dictionary)

#### Notebook driver script from here on.

Reading in the truck data; one can change how many records are read by using "records = <#>" in the readCsv function or remove it entirely to read all records.

In [3]:
truckTwoDf = readCsv(truckTwoPath, records = 100000)
truckTwoDf.head()

Unnamed: 0,Time,C_1115_1_Alternator_250A,C_1115_2_BattOut_100A,C_1115_3_Fridge_20A,C_1115_4_Trailer_50A,C_1115_5_Inventer_100A,TotalVehicleDistanceHighRes,EngineSpeed,WheelBasedVehicleSpeed,asu_UreaTankTemp,...,tse_TurboSpeed,CPU load,UTC hour,UTC minute,UTC second,UTC month,UTC day,UTC year,GPS altitude,GPS speed
0,03/11/2019 14:49:09.038,-35.452125,4.949645,0.555047,-0.430114,-2.283131,,1096.375,106.886719,,...,,75.0,18.0,49.0,11.0,3.0,11.0,19.0,337.5,29.323334
1,03/11/2019 14:49:09.138,-35.480735,5.270085,0.626764,-0.420577,-2.294575,,1098.875,107.101562,,...,,75.0,18.0,49.0,11.0,3.0,11.0,19.0,337.5,29.323334
2,03/11/2019 14:49:09.238,-35.480735,5.197604,0.56878,-62.5,-2.271687,,1096.75,107.089844,,...,,75.0,18.0,49.0,11.0,3.0,11.0,19.0,337.5,29.323334
3,03/11/2019 14:49:09.338,-35.452125,5.41886,0.638209,-62.5,-2.290761,411617045.0,1098.375,106.921875,,...,,75.0,18.0,49.0,11.0,3.0,11.0,19.0,337.5,29.323334
4,03/11/2019 14:49:09.438,-35.471199,5.228122,0.590143,-62.5,-2.283131,411617045.0,1101.75,107.109375,,...,,75.0,18.0,49.0,11.0,3.0,11.0,19.0,337.5,29.323334


Dropping UTC columns.

In [4]:
truckTwoDf = cullUtcCols(truckTwoDf)
#truckTwoDf.head()

Renaming the columns for better readability.

In [5]:
# Create a Python dictionary out of the CSV dictionary.
truckTwoDict = createPythonDictionary(truckTwoDictPath)

truckTwoDf = renameColumns(truckTwoDf, truckTwoDict)
truckTwoDf.head()

Unnamed: 0,Time (DateTime),1115 Alternator (A),1115 Battery Out (A),1115 Fridge (A),1115 Trailer (A),1115 Inverter (A),Total Distance (m),Engine Speed (rpm),Vehicle Speed (Wheel-Based; km/hr),Urea Tank Temperature (C),...,Road Inclination (%),Vehicle Weight (kg),Outside Air Temperature (C),Cruise Control Speed (km/hr),Vehicle Speed (km/hr),Boost Pressure (kPa),Turbo Speed (rpm),CPU Load (%),Altitude (m),Speed (km/hr)
0,03/11/2019 14:49:09.038,-35.452125,4.949645,0.555047,-0.430114,-2.283131,,1096.375,106.886719,,...,2.4,18000.0,16.9375,,106.886719,196.0,,75.0,337.5,29.323334
1,03/11/2019 14:49:09.138,-35.480735,5.270085,0.626764,-0.420577,-2.294575,,1098.875,107.101562,,...,2.4,18000.0,16.9375,,107.101562,198.0,,75.0,337.5,29.323334
2,03/11/2019 14:49:09.238,-35.480735,5.197604,0.56878,-62.5,-2.271687,,1096.75,107.089844,,...,2.4,18000.0,16.9375,,107.089844,198.0,,75.0,337.5,29.323334
3,03/11/2019 14:49:09.338,-35.452125,5.41886,0.638209,-62.5,-2.290761,411617045.0,1098.375,106.921875,,...,2.4,18000.0,16.9375,,106.921875,200.0,,75.0,337.5,29.323334
4,03/11/2019 14:49:09.438,-35.471199,5.228122,0.590143,-62.5,-2.283131,411617045.0,1101.75,107.109375,,...,2.4,18000.0,16.9375,,107.109375,200.0,,75.0,337.5,29.323334


##### GPS Speed vs. Wheel-Based Speed
Selecting the GPS Speed and Wheel-Based Speed columns.

In [6]:
columns = ['Time (DateTime)', 'Speed (km/hr)', 'Vehicle Speed (Wheel-Based; km/hr)']
gpsVsWheel = truckTwoDf[columns].head()
gpsVsWheel.head()
# Stopping here; plotting this data would be much more effective if it was per day. We should discuss this point before proceeding.

Unnamed: 0,Time (DateTime),Speed (km/hr),Vehicle Speed (Wheel-Based; km/hr)
0,03/11/2019 14:49:09.038,29.323334,106.886719
1,03/11/2019 14:49:09.138,29.323334,107.101562
2,03/11/2019 14:49:09.238,29.323334,107.089844
3,03/11/2019 14:49:09.338,29.323334,106.921875
4,03/11/2019 14:49:09.438,29.323334,107.109375
