**Import modules**

In [1]:
import pandas as pd
from csv import reader

**Load Data**

In [2]:
def load_csv(fileName):
    """
    Load data from a csv file 
    
    Parameters
    ----------
    fileNanme: str
      Complete path of input dataset
    
    Returns
    -------
    List
      List of lists where each element represents a single record
    """
    dataset = list()
    with open(fileName, "r") as f:
        csv_reader = reader(f)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

**Import raw data from local disk**

In [3]:
fileName = '/Users/ranjeetsandhu/Documents/DataScience/Python/Projects_2021/shipFuelConsumption/predict-fuel-consumption/data/raw/ship_data.csv'
dataset = load_csv(fileName)
print("Loaded data with {0} rows and {1} columns".format(len(dataset), len(dataset[0])))

Loaded data with 16249 rows and 26 columns


**Convert to a pandas data frame**

In [4]:
colNames = dataset[0]
df = pd.DataFrame(dataset[1:], columns=colNames)

In [5]:
df.head()

Unnamed: 0,Time,Main Engine Fuel Consumption (MT/day),Main Engine Using HFO (bool),Main Engine Using MGO (bool),Draft Forward (meters),Draft Aft (meters),Draft Mid-1 (meters),Draft Mid-2 (meters),Shaft Speed (RPM),Shaft Torque (kNm),...,Weather Service Apparent Wind Direction (degrees from bow),Weather Service True Wind Speed (knots),Weather Service True Wind Direction (degrees from north),Weather Service Temperature (celsius),Weather Service Sea Current Direction (degrees from north),Weather Service Sea Current Speed (knots),Water Depth (meters),Weather Service Wind Wave Significant Height (meters),Weather Service Wind Wave Period (seconds),Weather Service Wind Wave Direction (degrees from north)
0,1499346900,0.0,0.0,0.0,0.0,0.0,0.0,11.49769530408434,84.87883673940372,1013.631638959456,...,20.42531505218065,6.675341148577384,314.22443378866336,20.28307101801556,254.03353255734427,0.070895904469751,0.0,0.0,0.0,0.0
1,1499349600,0.0,0.0,0.0,0.0,0.0,0.0,11.5093517372048,84.86830012600724,1004.2445586049872,...,23.426521938546927,7.034934072846166,308.61547709871866,20.36086463719164,323.89057481675064,0.0065964576441157,0.0,0.0,0.0,0.0
2,1499353200,0.0,0.0,0.0,0.0,0.0,0.0,11.531678099329117,84.87497204746015,1003.7516479131426,...,22.39643244067517,7.778713684573773,303.56322147423003,20.53443683808393,22.70305925684949,0.0270874495638674,0.0,0.0,0.0,0.0
3,1499356800,0.0,0.0,0.0,0.0,0.0,0.0,11.54562421229114,84.8674458516784,1010.7165453939892,...,8.588757045913667,8.508142760078867,300.737464493011,20.641539404342492,347.9482557147905,0.0345547924080713,0.0,0.0,0.0,0.0
4,1499360400,0.0,0.0,0.0,0.0,0.0,0.0,11.549795456125144,84.86453453842921,1013.1796958998166,...,-15.548512726704374,8.541663901920678,302.9034057677886,20.66217594432146,217.49511697863136,0.0295014217201107,0.0,0.0,0.0,0.0


**Functions to convert unix timestamp to date-time object and re-index data frame, rename dataFrame columns to more readable names for easy referencing**

In [6]:
def time_index(data, col_ts, ts_units):
    """
    Convert unix ts to date-time and re-index data-frame 
    
    Parameters
    ----------
    data: Pandas DataFrame
      Input pandas data frame object
    col_ts: str
      Column name with timestamp data
    ts_units: str
      Units of the timestamp. This could be 's' as seconds, refer to documentation for more options.
    
    Returns
    -------
    DataFrame
      Pandas DataFrame object with dateTime index
    """
    data[col_ts] = pd.to_datetime(data[col_ts], unit=ts_units)
    data.set_index(col_ts, inplace=True)
    return data

def rename_cols(data, new_col_names):
    """
    Rename columns 
    """
    data.columns = new_col_names
    return data    

In [7]:
df = time_index(df, 'Time', 's')

In [8]:
new_col_names = ['fuelConsumption', 'HFO', 'MGO', 'draftForward', 'draftAft', 'draftMid1', 'draftMid2',
              'shaftSpeed', 'shaftTorque', 'shaftPower', 'speedGround', 'speedWater', 'heading', 'rudderAngle',
              'AWS', 'AWD', 'TWS', 'TWD', 'temp', 'currentDirection', 'currentSpeed', 'waterDepth', 'waveHeight',
              'wavePeriod', 'waveDirection']

df = rename_cols(df, new_col_names)

In [9]:
df.head()

Unnamed: 0_level_0,fuelConsumption,HFO,MGO,draftForward,draftAft,draftMid1,draftMid2,shaftSpeed,shaftTorque,shaftPower,...,AWD,TWS,TWD,temp,currentDirection,currentSpeed,waterDepth,waveHeight,wavePeriod,waveDirection
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-07-06 13:15:00,0.0,0.0,0.0,0.0,0.0,0.0,11.49769530408434,84.87883673940372,1013.631638959456,0.0,...,20.42531505218065,6.675341148577384,314.22443378866336,20.28307101801556,254.03353255734427,0.070895904469751,0.0,0.0,0.0,0.0
2017-07-06 14:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.5093517372048,84.86830012600724,1004.2445586049872,0.0,...,23.426521938546927,7.034934072846166,308.61547709871866,20.36086463719164,323.89057481675064,0.0065964576441157,0.0,0.0,0.0,0.0
2017-07-06 15:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.531678099329117,84.87497204746015,1003.7516479131426,0.0,...,22.39643244067517,7.778713684573773,303.56322147423003,20.53443683808393,22.70305925684949,0.0270874495638674,0.0,0.0,0.0,0.0
2017-07-06 16:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.54562421229114,84.8674458516784,1010.7165453939892,0.0,...,8.588757045913667,8.508142760078867,300.737464493011,20.641539404342492,347.9482557147905,0.0345547924080713,0.0,0.0,0.0,0.0
2017-07-06 17:00:00,0.0,0.0,0.0,0.0,0.0,0.0,11.549795456125144,84.86453453842921,1013.1796958998166,0.0,...,-15.548512726704374,8.541663901920678,302.9034057677886,20.66217594432146,217.49511697863136,0.0295014217201107,0.0,0.0,0.0,0.0


In [10]:
# df.to_csv("indexed-dataFrame.csv")