**Import modules**

In [2]:
import pandas as pd
import numpy as np
from csv import reader

**Load Data**

In [3]:
def load_csv(fileName):
    """
    Load data from a csv file 
    
    Parameters
    ----------
    fileNanme: str
      Complete path of input dataset
    
    Returns
    -------
    List
      List of lists where each element represents a single record
    """
    dataset = list()
    with open(fileName, "r") as f:
        csv_reader = reader(f)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

**Import raw data from local disk**

In [7]:
fileName = '/Users/ranjeetsandhu/Documents/DataScience/Python/Projects_2021/shipFuelConsumption/predict-fuel-consumption/data/raw/ship_data.csv'
dataset = load_csv(fileName)
print("Loaded data with {0} rows and {1} columns".format(len(dataset), len(dataset[0])))

Loaded data with 16249 rows and 26 columns


**Convert to a pandas data frame**

In [8]:
colNames = dataset[0]
df = pd.DataFrame(dataset[1:], columns=colNames)

**Functions to convert unix timestamp to date-time object and re-index data frame, rename dataFrame columns to more readable names for easy referencing**

In [25]:
def time_index(data, col_ts, ts_units):
    """
    Convert unix ts to date-time and re-index data-frame 
    
    Parameters
    ----------
    data: Pandas DataFrame
      Input pandas data frame object
    col_ts: str
      Column name with timestamp data
    ts_units: str
      Units of the timestamp. This could be 's' as seconds, refer to documentation for more options.
    
    Returns
    -------
    DataFrame
      Pandas DataFrame object with dateTime index
    """
    data[col_ts] = pd.to_datetime(data[col_ts], unit=ts_units)
    data.set_index(col_ts, inplace=True)
    return data

def rename_cols(data, new_col_names):
    """
    Rename columns 
    """
    data.columns = new_col_names
    return data    

In [None]:
df = time_index(df, 'Time', 's')

In [30]:
new_col_names = ['fuelConsumption', 'HFO', 'MGO', 'draftForward', 'draftAft', 'draftMid1', 'draftMid2',
              'shaftSpeed', 'shaftTorque', 'shaftPower', 'speedGround', 'speedWater', 'heading', 'rudderAngle',
              'AWS', 'AWD', 'TWS', 'TWD', 'temp', 'currentDirection', 'currentSpeed', 'waterDepth', 'waveHeight',
              'wavePeriod', 'waveDirection']

df = rename_cols(df, new_col_names)

In [37]:
# df.to_csv("indexed-dataFrame.csv")