In [2]:
import pandas as pd
from pathlib import Path
from pprint import pprint

In [3]:

raw_data = Path('rawdata')
tables = {}
for i, file in enumerate(raw_data.iterdir()):
    # if i > 10:
    #     break
    name = file.name.split('.')[0]
    tables[f"{name}"] = pd.read_csv(f"rawdata/{file.name}", header = None, names=('ID', name))


In [4]:
columns = [i for i in tables.keys()] 
columns.sort()
pprint(columns)

['fuelDensity',
 'fuelTemp',
 'fuelVolumeFlowRate',
 'inclinometer-raw',
 'latitude',
 'level1median',
 'level2median',
 'longitude',
 'longitudinalWaterSpeed',
 'portPitch',
 'portRudder',
 'speedKmh',
 'speedKnots',
 'starboardPitch',
 'starboardRudder',
 'trackDegreeMagnetic',
 'trackDegreeTrue',
 'trueHeading',
 'windAngle',
 'windSpeed']


### Aggregting date for mass flow of fuel

In [5]:
df = pd.DataFrame(tables["fuelDensity"].ID.tolist(), columns = ('ID',))
df = df.merge(tables["fuelDensity"], on='ID',how='inner')
df = df.merge(tables["fuelVolumeFlowRate"], on='ID',how='inner')
df

Unnamed: 0,ID,fuelDensity,fuelVolumeFlowRate
0,634019142119225390,0.947109,0.000000
1,634019142129597610,0.947110,0.000000
2,634019142139821660,0.947114,0.000000
3,634019142150036040,0.947104,0.000000
4,634019212935606850,0.938305,0.283816
...,...,...,...
1627319,634067079121428610,0.930151,0.203576
1627320,634067079131642650,0.930103,0.205217
1627321,634067079141866640,0.930075,0.212853
1627322,634067079152080710,0.930082,0.224660


### Date transformation functions

In [21]:
def ticks_to_iso(ticks):
    """Convert .NET DateTime.Ticks to an ISO 8601 formatted string."""
    ticks_epoch = datetime(1, 1, 1)  # .NET starts from year 0001-01-01
    date_time = ticks_epoch + timedelta(microseconds=ticks / 10)  # Convert ticks to microseconds
    return date_time.isoformat()

def ticks_to_ymd(ticks):
    """Convert .NET DateTime.Ticks to an ISO 8601 formatted string."""
    ticks_epoch = datetime(1, 1, 1)  # .NET starts from year 0001-01-01
    date_time = ticks_epoch + timedelta(microseconds=ticks / 10)  # Convert ticks to microseconds
    return date_time.strftime("%Y-%m-%d")


In [22]:
df["date"] = df.ID.apply(ticks_to_ymd)
df["iso_date"] = df.ID.apply(ticks_to_iso)

In [8]:
df

Unnamed: 0,ID,fuelDensity,fuelVolumeFlowRate,date,iso_date
0,634019142119225390,0.947109,0.000000,2010-02-16,2010-02-16T10:50:11.922536
1,634019142129597610,0.947110,0.000000,2010-02-16,2010-02-16T10:50:12.959760
2,634019142139821660,0.947114,0.000000,2010-02-16,2010-02-16T10:50:13.982168
3,634019142150036040,0.947104,0.000000,2010-02-16,2010-02-16T10:50:15.003600
4,634019212935606850,0.938305,0.283816,2010-02-16,2010-02-16T12:48:13.560688
...,...,...,...,...,...
1627319,634067079121428610,0.930151,0.203576,2010-04-12,2010-04-12T22:25:12.142864
1627320,634067079131642650,0.930103,0.205217,2010-04-12,2010-04-12T22:25:13.164264
1627321,634067079141866640,0.930075,0.212853,2010-04-12,2010-04-12T22:25:14.186664
1627322,634067079152080710,0.930082,0.224660,2010-04-12,2010-04-12T22:25:15.208072


In [9]:
display(df[(df["fuelVolumeFlowRate"] > 0) & (df["date"] == '2010-02-16')])
display(df[(df["fuelVolumeFlowRate"] > 0) &  (df["date"] == '2010-02-17')])

Unnamed: 0,ID,fuelDensity,fuelVolumeFlowRate,date,iso_date
4,634019212935606850,0.938305,0.283816,2010-02-16,2010-02-16T12:48:13.560688
5,634019212945820680,0.938173,0.279202,2010-02-16,2010-02-16T12:48:14.582064
6,634019212956034910,0.938036,0.282821,2010-02-16,2010-02-16T12:48:15.603488
7,634019212966268860,0.937983,0.280900,2010-02-16,2010-02-16T12:48:16.626888
8,634019212976492840,0.937767,0.279850,2010-02-16,2010-02-16T12:48:17.649280
...,...,...,...,...,...
8634,634019301152989210,0.926645,0.185059,2010-02-16,2010-02-16T15:15:15.298920
8635,634019301163232170,0.926666,0.185255,2010-02-16,2010-02-16T15:15:16.323216
8636,634019301173466570,0.926686,0.187334,2010-02-16,2010-02-16T15:15:17.346656
8637,634019301183680670,0.926721,0.185752,2010-02-16,2010-02-16T15:15:18.368064


Unnamed: 0,ID,fuelDensity,fuelVolumeFlowRate,date,iso_date
8639,634019858471552070,0.935706,0.394521,2010-02-17,2010-02-17T06:44:07.155208
8640,634019858481776080,0.935774,0.347631,2010-02-17,2010-02-17T06:44:08.177608
8641,634019858492000120,0.935693,0.327961,2010-02-17,2010-02-17T06:44:09.200016
8642,634019858502214030,0.935598,0.317626,2010-02-17,2010-02-17T06:44:10.221400
8643,634019858512448020,0.935773,0.311582,2010-02-17,2010-02-17T06:44:11.244800
...,...,...,...,...,...
36613,634020383895512390,0.925759,0.221838,2010-02-17,2010-02-17T21:19:49.551240
36614,634020383905736690,0.925743,0.221843,2010-02-17,2010-02-17T21:19:50.573672
36615,634020383915950290,0.925741,0.215218,2010-02-17,2010-02-17T21:19:51.595032
36616,634020383926174300,0.925739,0.216107,2010-02-17,2010-02-17T21:19:52.617432


#### Adding data about motion
Since fuel density is implied by fuel temperature. These variables are correlated and adding tempereture would not add any information. I will rather add location as it can be more related to the weather conditions, that can have influence on fuel mass flow. Since joining on time stamp is not an option in this case, I will transform it to human readable format and 

In [10]:
def add_dates(table_name): 
    tables[table_name]["date"] = tables[table_name].ID.apply(ticks_to_ymd)
    tables[table_name]["iso_date"] = tables[table_name].ID.apply(ticks_to_iso)

In [11]:
add_dates("longitude")
add_dates("latitude")

In [12]:
df_longitude = tables["longitude"]
df_longitude["iso_date_min"] = df_longitude.iso_date.apply(lambda x: x[:16])
df_longitude

Unnamed: 0,ID,longitude,date,iso_date,iso_date_min
0,634018095137704470,00649.1143W,2010-02-15,2010-02-15T05:45:13.770448,2010-02-15T05:45
1,634018095137898920,00649.1143W,2010-02-15,2010-02-15T05:45:13.789888,2010-02-15T05:45
2,634018095146534160,00649.1143W,2010-02-15,2010-02-15T05:45:14.653416,2010-02-15T05:45
3,634018095157587450,00649.1144W,2010-02-15,2010-02-15T05:45:15.758744,2010-02-15T05:45
4,634018095158588070,00649.1144W,2010-02-15,2010-02-15T05:45:15.858808,2010-02-15T05:45
...,...,...,...,...,...
2677691,634067079142236760,00649.1087W,2010-04-12,2010-04-12T22:25:14.223680,2010-04-12T22:25
2677692,634067079143136790,00649.1087W,2010-04-12,2010-04-12T22:25:14.313680,2010-04-12T22:25
2677693,634067079152991490,00649.1088W,2010-04-12,2010-04-12T22:25:15.299152,2010-04-12T22:25
2677694,634067079154070470,00649.1088W,2010-04-12,2010-04-12T22:25:15.407048,2010-04-12T22:25


In [13]:
print("First fuel consumption timestemp 2010-02-16T12:48:13.560688")
print("Longitude")
display(df_longitude[df_longitude.iso_date_min == '2010-02-16T12:48'])
print("Latitude")
display(tables["latitude"])

First fuel consumption timestemp 2010-02-16T12:48:13.560688
Longitude


Unnamed: 0,ID,longitude,date,iso_date,iso_date_min
81290,634019212953686680,00645.9397W,2010-02-16,2010-02-16T12:48:15.368672,2010-02-16T12:48
81291,634019212954187230,00645.9397W,2010-02-16,2010-02-16T12:48:15.418720,2010-02-16T12:48
81292,634019212954766060,00645.9397W,2010-02-16,2010-02-16T12:48:15.476608,2010-02-16T12:48
81293,634019212972795710,00645.9408W,2010-02-16,2010-02-16T12:48:17.279568,2010-02-16T12:48
81294,634019212973845470,00645.9408W,2010-02-16,2010-02-16T12:48:17.384544,2010-02-16T12:48
...,...,...,...,...,...
81356,634019213372280600,00645.9626W,2010-02-16,2010-02-16T12:48:57.228064,2010-02-16T12:48
81357,634019213373190630,00645.9626W,2010-02-16,2010-02-16T12:48:57.319064,2010-02-16T12:48
81358,634019213384673970,00645.9632W,2010-02-16,2010-02-16T12:48:58.467400,2010-02-16T12:48
81359,634019213392539370,00645.9636W,2010-02-16,2010-02-16T12:48:59.253936,2010-02-16T12:48


Latitude


Unnamed: 0,ID,latitude,date,iso_date
0,634018095137704470,6132.9236N,2010-02-15,2010-02-15T05:45:13.770448
1,634018095137898920,6132.9236N,2010-02-15,2010-02-15T05:45:13.789888
2,634018095146534160,6132.9240N,2010-02-15,2010-02-15T05:45:14.653416
3,634018095157587450,6132.9243N,2010-02-15,2010-02-15T05:45:15.758744
4,634018095158588070,6132.9243N,2010-02-15,2010-02-15T05:45:15.858808
...,...,...,...,...
2677708,634067079142236760,6132.9260N,2010-04-12,2010-04-12T22:25:14.223680
2677709,634067079143136790,6132.9260N,2010-04-12,2010-04-12T22:25:14.313680
2677710,634067079152991490,6132.9255N,2010-04-12,2010-04-12T22:25:15.299152
2677711,634067079154070470,6132.9255N,2010-04-12,2010-04-12T22:25:15.407048


In [17]:
# I cannot joinon date ofc
'''
df_gps = pd.DataFrame(tables["latitude"].date.tolist(), columns = ('date',))
df_gps = df_gps.merge(tables["latitude"].drop("ID", axis=1), on='date',how='inner')
df_gps = df_gps.merge(tables["longitude"].drop("ID", axis=1), on='date',how='inner')
df_gps
'''

'\ndf_gps = pd.DataFrame(tables["latitude"].date.tolist(), columns = (\'date\',))\ndf_gps = df_gps.merge(tables["latitude"].drop("ID", axis=1), on=\'date\',how=\'inner\')\ndf_gps = df_gps.merge(tables["longitude"].drop("ID", axis=1), on=\'date\',how=\'inner\')\ndf_gps\n'

In [15]:
display(tables["latitude"].drop("ID", axis=1))

Unnamed: 0,latitude,date,iso_date
0,6132.9236N,2010-02-15,2010-02-15T05:45:13.770448
1,6132.9236N,2010-02-15,2010-02-15T05:45:13.789888
2,6132.9240N,2010-02-15,2010-02-15T05:45:14.653416
3,6132.9243N,2010-02-15,2010-02-15T05:45:15.758744
4,6132.9243N,2010-02-15,2010-02-15T05:45:15.858808
...,...,...,...
2677708,6132.9260N,2010-04-12,2010-04-12T22:25:14.223680
2677709,6132.9260N,2010-04-12,2010-04-12T22:25:14.313680
2677710,6132.9255N,2010-04-12,2010-04-12T22:25:15.299152
2677711,6132.9255N,2010-04-12,2010-04-12T22:25:15.407048


In [16]:
tables["trueHeading"].ID.apply

Unnamed: 0,ID,trueHeading
0,634018095140997220,317.93
1,634018095152989600,318.32
2,634018095164992880,318.83
3,634018095176985300,319.33
4,634018095188988420,319.77
...,...,...
1439534,634067079106147630,318.35
1439535,634067079118151620,318.19
1439536,634067079130153750,318.02
1439537,634067079142146670,317.93
