**Import modules**

In [2]:
import pandas as pd
from csv import reader

**Load Data**

In [4]:
def load_csv(fileName):
    """
    Load data from a csv file 
    
    Parameters
    ----------
    fileNanme: str
      Complete path of input dataset
    
    Returns
    -------
    List
      List of lists where each element represents a single record
    """
    dataset = list()
    with open(fileName, "r") as f:
        csv_reader = reader(f)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

**Import raw data from local disk**

In [5]:
fileName = '../data/raw/ship_data.csv'
dataset = load_csv(fileName)
print("Loaded data with {0} rows and {1} columns".format(len(dataset), len(dataset[0])))

Loaded data with 16249 rows and 26 columns


**Convert to a pandas data frame**

In [4]:
colNames = dataset[0]
df = pd.DataFrame(dataset[1:], columns=colNames)

In [5]:
df.iloc[:6, :10]

Unnamed: 0,Time,Main Engine Fuel Consumption (MT/day),Main Engine Using HFO (bool),Main Engine Using MGO (bool),Draft Forward (meters),Draft Aft (meters),Draft Mid-1 (meters),Draft Mid-2 (meters),Shaft Speed (RPM),Shaft Torque (kNm)
0,1499346900,0.0,0.0,0.0,0.0,0.0,0.0,11.49769530408434,84.87883673940372,1013.631638959456
1,1499349600,0.0,0.0,0.0,0.0,0.0,0.0,11.5093517372048,84.86830012600724,1004.2445586049872
2,1499353200,0.0,0.0,0.0,0.0,0.0,0.0,11.531678099329117,84.87497204746015,1003.7516479131426
3,1499356800,0.0,0.0,0.0,0.0,0.0,0.0,11.54562421229114,84.8674458516784,1010.7165453939892
4,1499360400,0.0,0.0,0.0,0.0,0.0,0.0,11.549795456125144,84.86453453842921,1013.1796958998166
5,1499364000,0.0,0.0,0.0,0.0,0.0,0.0,11.550793619597425,84.848977308173,1009.4980609983792


**Functions to convert unix timestamp to date-time object and re-index data frame, rename dataFrame columns to more readable names for easy referencing**

In [6]:
def time_index(data, col_ts, ts_units):
    """
    Convert unix ts to date-time and re-index data-frame 
    
    Parameters
    ----------
    data: Pandas DataFrame
      Input pandas data frame object
    col_ts: str
      Column name with timestamp data
    ts_units: str
      Units of the timestamp. This could be 's' as seconds, refer to documentation for more options.
    
    Returns
    -------
    DataFrame
      Pandas DataFrame object with dateTime index
    """
    data[col_ts] = pd.to_datetime(data[col_ts], unit=ts_units)
    data.set_index(col_ts, inplace=True)
    return data

def rename_cols(data, new_col_names):
    """
    Rename columns 
    """
    data.columns = new_col_names
    return data    

In [7]:
df = time_index(df, 'Time', 's')

In [8]:
new_col_names = ['fuelConsumption', 'HFO', 'MGO', 'draftForward', 'draftAft', 'draftMid1', 'draftMid2',
              'shaftSpeed', 'shaftTorque', 'shaftPower', 'speedGround', 'speedWater', 'heading', 'rudderAngle',
              'AWS', 'AWD', 'TWS', 'TWD', 'temp', 'currentDirection', 'currentSpeed', 'waterDepth', 'waveHeight',
              'wavePeriod', 'waveDirection']

df = rename_cols(df, new_col_names)

In [9]:
df.iloc[:6, :9]

Unnamed: 0_level_0,fuelConsumption,HFO,MGO,draftForward,draftAft,draftMid1,draftMid2,shaftSpeed,shaftTorque
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-07-06 13:15:00,0.0,0.0,0.0,0.0,0.0,0.0,11.49769530408434,84.87883673940372,1013.631638959456
2017-07-06 14:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.5093517372048,84.86830012600724,1004.2445586049872
2017-07-06 15:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.531678099329117,84.87497204746015,1003.7516479131426
2017-07-06 16:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.54562421229114,84.8674458516784,1010.7165453939892
2017-07-06 17:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.549795456125144,84.86453453842921,1013.1796958998166
2017-07-06 18:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.550793619597425,84.848977308173,1009.4980609983792


In [10]:
# df.to_csv("indexed-dataFrame.csv")