# Parse data from ebike datalogger

In [None]:
import xgboost as xgb
from xgboost import plot_importance, plot_tree, to_graphviz
from sklearn.datasets import load_boston

import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__))
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, accuracy_score

import glob
import os
import numpy as np
import pandas as pd
import scipy as sp
import scipy.signal as sg


from filter import signal_filter
from gen_plots import display_interesting_variables, display_all_variables
from Battery_Kalman.soc_estimator import SocEstimator



import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = 'browser'


# DANGEROUS DONT DO
pd.options.mode.chained_assignment = None  # default='warn'

#px.set_mapbox_access_token(open(".mapbox_token").read())

BATTERY_ENERGY_CAPACITY = 752.4 # Kilo Joules

#raw_data_path = "D:/OneDrive - Imperial College London/University Storage/Masters project/data_storage/"
raw_data_path = "/home/medad/Downloads/MastersProject/Bike_logger/Data_analysis/data_storage/data_storage/"

# Number of PAS magnets
N_PAS_MAGNETS = 12

# pressure at sea level where the readings are being taken.  
qnh=1032.57

# read raw data
def read_file(filepath):
    my_cols = range(16)

    date_parser=lambda x: pd.to_datetime(x, errors="coerce", format = "%Y-%m-%dT%H:%M:%S.%fZ", utc=True)

    
    df = pd.read_csv(filepath,
                names=my_cols,
                engine='c',
                parse_dates=[0],
                date_parser=date_parser)


    df.rename(columns={0: 'Datetime',
                           1: 'sensor',
                          }, inplace=True)    
    
    df.dropna(inplace=True, subset=['Datetime'])
    
    df.sort_values(by='Datetime',inplace = True)


    df = df[~(df['Datetime'] < '2020-03-12 18:46:00')]
    
        
    return df


def filter_df_signal(df, input_name, output_name, highcut_f):
    df[output_name] = signal_filter(df[input_name], highcut=highcut_f, method='butterworth_ba', order=5)
    return df


def energy_from_power_time(datetime_series, power_series):
    """
    Return power in kilo joules
    """
    time_delta = datetime_series.diff().dt.total_seconds().fillna(0)
    energy = power_series*time_delta
    return energy.sum()/1000000

def pulse_width_pas_to_rpm(pulse_width):   
    return 1000000/pulse_width/N_PAS_MAGNETS

def pulse_width_to_rpm(pulse_width):   
    return 1000000/pulse_width

def get_altitude(pressure,temperature):
    # The temperature should be the outdoor temperature. 
    # Use the manual_temperature variable if temperature adjustments are required.
    altitude = ((pow((qnh / pressure), (1.0 / 5.257)) - 1) * (temperature + 273.15)) / 0.0065
    return altitude

def insert_time(row):
    return row['Datetime'].replace(minute=int(row['minute']),second=int(row['second']),microsecond=int(row['millisecond']*1000))

def process_gps(df):
    mask = df["sensor"] == 'gps'
    df_gps = df[mask]

    df_gps.rename(columns={2: 'hour',
                           3: 'minute',
                           4: 'second',
                           5: 'millisecond',
                           6: 'latitude',
                           7: 'longitude',
                           8: 'altitude',
                           9: 'GPS Speed',
                           10: 'sats',
                           11: 'gnssFixOK',
                           12: 'fix_type',
                          }, inplace=True)

    df_gps['Datetime'] = df_gps.apply(lambda r: insert_time(r), axis=1)
    df_gps.sort_values(by='Datetime',inplace = True)
    
    df_gps = df_gps[df_gps['gnssFixOK'] == 1]

    
    offset = 9.5 # seconds
    df_gps["Datetime"] = df_gps["Datetime"] - pd.Timedelta(offset, unit='s')

    

    time_delta = df_gps["Datetime"].diff().dt.total_seconds().fillna(0)
    df_gps['gps_acceleration'] = df_gps["GPS Speed"].diff()/time_delta
    
    
    x = df_gps["longitude"].diff().fillna(0)
    y = df_gps["latitude"].diff().fillna(0)
    
    x = signal_filter(x, highcut=100, method='butterworth_ba', order=5)
    y = signal_filter(y, highcut=100, method='butterworth_ba', order=5)

    phi, df_gps['heading'] = cart2pol(x, y)
    
    



    df_gps.dropna(axis=1, how='all',inplace=True)
    df_gps.head()
    
    return df_gps

def cart2pol(x, y):
    rho = np.sqrt(x**2 + y**2)
    phi = np.arctan2(y, x)
    return(rho, phi)

def process_imu(df):
    mask = df["sensor"] == 'imu'
    df_imu = df[mask]

    df_imu.rename(columns={2: 'acceleration_x',
                           3: 'acceleration_y',
                           4: 'acceleration_z',
                           5: 'gyro_x',
                           6: 'gyro_y',
                           7: 'gyro_z',
                          }, inplace=True)

    df_imu.dropna(axis=1, how='all',inplace=True)
    
    df_imu = filter_df_signal(df_imu, "gyro_x", "gyro_x_filtered", 10 )
    df_imu = filter_df_signal(df_imu, "acceleration_x", "acceleration_x_filtered", 10 )


    return df_imu

def process_brake(df):
    mask = df["sensor"] == 'brake_state'
    df_brake = df[mask]

    df_brake.rename(columns={2: 'brake_state',
                          }, inplace=True)

    df_brake.dropna(axis=1, how='all',inplace=True)
    return df_brake


def process_pas(df):
    mask = df["sensor"] == 'pas'
    df_pas = df[mask]


    df_pas.rename(columns={2: 'pulse_delay_us',
                          }, inplace=True)
    df_pas.dropna(axis=1, how='all',inplace=True)
    df_pas = df_pas[df_pas['pulse_delay_us'] > 4000]

    df_pas['pas_rpm'] = df_pas.apply(lambda x: pulse_width_pas_to_rpm(x['pulse_delay_us']), axis=1)

    df_pas.head()
    
    return df_pas
    
def process_motor_speed(df, df_gps):
    mask = df["sensor"] == 'motor_speed'
    df_ms = df[mask]

    df_ms.rename(columns={2: 'pulse_delay_us',
                          }, inplace=True)
    
    df_ms.dropna(axis=1, how='all',inplace=True)
    
    df_ms = df_ms[df_ms['pulse_delay_us'] > 15000]


    df_ms['motor_rpm'] = df_ms.apply(lambda x: pulse_width_to_rpm(x['pulse_delay_us']), axis=1)
    
    df_ms_merged = pd.merge_asof(df_ms, df_gps, on = 'Datetime', direction = 'nearest')
    
    multiplier = df_ms_merged['GPS Speed'].div(df_ms_merged['motor_rpm'], axis = 0).mean()

    
    
    df_ms['motor_rpm'] = df_ms['motor_rpm'] * multiplier
    
    
    time_delta = df_ms["Datetime"].diff().dt.total_seconds().fillna(0)
    df_ms['filtered_motor_rpm'] = signal_filter(df_ms['motor_rpm'], highcut=100, method='butterworth_ba', order=5)
    df_ms['motor_acceleration'] = df_ms["filtered_motor_rpm"].diff()/time_delta


    
    return df_ms

def process_ina(df):    
    mask = df["sensor"] == 'ina226'
    df_ina = df[mask]
    
    SHUNT_RESISTANCE = 0.00215 # ohms

    df_ina.rename(columns={2: 'INA226 ID',
                           3: 'Battery_Voltage',
                           4: 'V_shunt',
                           5: 'Current_uncalibrated',
                           6: 'Power_uncalibrated',
                          }, inplace=True)
    df_ina.dropna(axis=1, how='all',inplace=True)
    df_ina.reset_index()


    df_ina = df_ina[df_ina["Battery_Voltage"] != 0]
    
    df_ina["Current"] = df_ina["V_shunt"] / SHUNT_RESISTANCE
    df_ina["Power"] = df_ina["Current"] * df_ina["Battery_Voltage"]

    df_ina["Power_averaged"] = signal_filter(df_ina['Power'], highcut=6, method='butterworth_ba', order=2)
    df_ina["Current_averaged"] = signal_filter(df_ina['Current'], highcut=6, method='butterworth_ba', order=2)
    df_ina["Battery_Voltage_averaged"] = signal_filter(df_ina['Battery_Voltage'], highcut=6, method='butterworth_ba', order=2)


    print("Total Energy Consumption[KiloJoules]",energy_from_power_time(df_ina["Datetime"],df_ina["Power"]))
    df_ina.head()
    
    return df_ina

def process_baro(df,df_ms, df_gps):
    mask = df["sensor"] == 'baro'
    df_baro = df[mask]

    df_baro.rename(columns={2: 'temperature',
                           3: 'Pressure',
                           4: 'humidity',
                          }, inplace=True)





    df_baro['Baro_Altitude'] = df_baro.apply(lambda x: get_altitude(x['Pressure'], x['temperature']), axis=1)
    df_baro['filtered_Baro_Altitude'] = signal_filter(df_baro['Baro_Altitude'], highcut=10, method='butterworth_ba', order=2)

    df_baro = pd.merge_asof(df_baro, df_ms, on = 'Datetime', direction = 'forward')
    df_baro_merged = pd.merge_asof(df_baro, df_gps, on = 'Datetime', direction = 'nearest')
    
    
    
    offset = df_baro_merged['Baro_Altitude'].sub(df_baro_merged['altitude'], axis = 0).mean()
    

    time_delta = df_baro["Datetime"].diff().dt.total_seconds().fillna(0)
    df_baro['slope'] = df_baro["filtered_Baro_Altitude"].diff()/(time_delta*df_baro["filtered_motor_rpm"])
    
    
    df_baro['Baro_Altitude'] = df_baro['Baro_Altitude'] - offset
    df_baro['filtered_Baro_Altitude'] = df_baro['filtered_Baro_Altitude'] - offset



    df_baro.dropna(axis=1, how='all',inplace=True)

    df_baro.head()
    
    return df_baro

def process(df):
    print("Start GPS process")
    df_gps = process_gps(df)
    print("GPS process DONE....")
    
    df_pas = process_pas(df)
    print("PAS process DONE....")
    
    df_ms = process_motor_speed(df, df_gps)
    print("Motor Speed process DONE....")

    df_ina = process_ina(df)
    print("INA226 process DONE....")

    df_baro = process_baro(df,df_ms,df_gps)
    print("Barometer process DONE....")

    df_imu = process_imu(df)
    print("IMU process DONE....")

    df_brake = process_brake(df)
    print("Brake process DONE....")


    return df_ina, df_gps, df_baro, df_pas, df_ms, df_imu, df_brake

def display_gps_positions(df_gps):
    """
    Display GPS positions
    """
    # Display GPS positions
    fig = px.line_mapbox(df_gps,
                            lat="latitude",
                            lon="longitude",
                            #color="GPS Speed",#"slope",#"LOCATION Altitude ( m)",,#"Speed(km/h)", # "abs_acceleration" or "gps_acceleration" or "power"
                            zoom=14,
                            #hover_data=["Datetime", "altitude","sats", "heading"],
                            #size="LOCATION Accuracy ( m)"
                           )



    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    fig.write_html("output/GPS_track.html")
    fig.show()

def process_charge_data(fps):
    dfs  = [read_file(fp) for fp in fps]
    df = pd.concat(dfs, ignore_index=True)
    df_ina = process_ina(df)
    return df_ina

def display_charge_data(df_ina):
    
    
    FEATURES = ["Battery_Voltage_averaged","Current_averaged","Power_averaged"]
    TITLES = ["Battery Voltage[V]","Current[mA]","Power[mW]"]

    N_FEATURES = len(FEATURES)
    fig = make_subplots(rows=N_FEATURES, cols=1,
                        shared_xaxes=True,
                        vertical_spacing=0.01)

    fig.update_layout(hovermode="x unified")

    for i, feature in enumerate(FEATURES):
        fig.add_trace(go.Scatter(
            x=df_ina.index,
            y=df_ina[feature],
            name=feature,
            hoverinfo='y'),
            row=i+1, col=1)
        
#         non_averaged_feature = feature.replace("_averaged","")
#         fig.add_trace(go.Scatter(
#             x=df_ina.index,
#             y=df_ina[non_averaged_feature],
#             name=non_averaged_feature,
#             hoverinfo='y'),
#             row=i+1, col=1)

        fig.update_yaxes(title_text=TITLES[i], row=i+1, col=1)


    fig.update_layout(title_text="Power Parameters")

    fig.write_html("output/Charging_Power_variables.html")

    fig.show()

# params_x_for_ml = ['Battery_Voltage', 'V_shunt', 'Current', 'temperature', 'Pressure', 'humidity',
#                   'Baro_Altitude', 'filtered_Baro_Altitude', 'motor_rpm_x', 'motor_acceleration_x', 'filtered_motor_rpm_x', 'slope',
#                   'latitude', 'longitude', 'altitude', 'GPS Speed', 'sats', 'gnssFixOK', 'fix_type', 'gps_acceleration',
#                   'pas_rpm', 'motor_rpm_y', 'motor_acceleration_y', 'filtered_motor_rpm_y','acceleration_x','acceleration_y','acceleration_z',
#                    'gyro_x','gyro_y','gyro_z', 'brake_state']


# params_x_for_ml = ['temperature', 'Pressure', 'humidity','filtered_Baro_Altitude', 'filtered_motor_rpm_x', 'slope',
#                   'latitude', 'longitude', 'altitude', 'GPS Speed','acceleration_x','acceleration_y',
#                     'acceleration_z','gyro_x','gyro_y','gyro_z'
#                  ]

params_x_for_ml = ['temperature', 'Pressure', 'humidity',
                 'filtered_Baro_Altitude', 'slope',
                 'latitude', 'longitude', 'altitude',"heading","SOC"
                 ]

params_y_for_ml = "Power"

params_x_for_ml_soc = ['temperature', "Battery_Voltage", "Current", "Power"]
params_y_for_ml_soc = "SOC"


def gen_ml_data(dataframes):
    """
    The first data frame in dataframes should have the highest datarate
    """
    
    print("Merging Dataframes....")
    df_ml = dataframes[0]
    for df in dataframes[1:]:
        df_ml = pd.merge_asof(df_ml, df ,on = 'Datetime', direction = 'forward')
    print("Done merging Dataframes....")

    
#     df_ml["Datetime_copy"] = df_ml["Datetime"]
#     df_ml = df_ml.set_index('Datetime_copy')  
    
#     print("removing duplicate indexes")
#     df_ml = df_ml[~df_ml.index.duplicated(keep='first')]
    
#     print("Resampling Dataframe....")

#     df_ml = df_ml.resample('0.1S').bfill() # resample into 10 milliseconds intervals and backfill NAs with previous value
    
#     print("Done Resampling Dataframe....")

    return df_ml

def split_x_y(df_ml, params_x_for_ml, params_y_for_ml):
    x, y = df_ml[params_x_for_ml], df_ml[params_y_for_ml]
    return x, y
    

def process_for_ml(fps):
    """
    Takes in list of file paths, concats and processes them
    Set display_variables = True to visualise the variables.
    """
    
    dfs  = [read_file(fp) for fp in fps]
    
    df = pd.concat(dfs, ignore_index=True)

    raw_dfs = process(df)
    
    # Highest datarate must be in the start of the list
    df_ml = gen_ml_data(raw_dfs).dropna()
    
    return df_ml, raw_dfs

def print_power_consumption_score(timestamps, ytest, ypred):
    actual_energy = energy_from_power_time(timestamps, ytest)
    predicted_energy = energy_from_power_time(timestamps, ypred)
    Error = 100 * (predicted_energy - actual_energy)/actual_energy

    print("Actual energy:",actual_energy, "Predicted Energy:", predicted_energy, "Error[%](ideal should be 0%):", Error, "%")

def print_test_results(xgbr, df_ml):
    x, y = split_x_y(df_ml, params_x_for_ml, params_y_for_ml)
    ypred = xgbr.predict(x)
    
    timestamps = df_ml["Datetime"]
    print("Test Results :: ")
    print_power_consumption_score(timestamps, y, ypred)

def do_ml(x,y):
    xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.15, shuffle=True)

    xgbr = xgb.XGBRegressor(verbosity=1)
    print(xgbr)

    xgbr.fit(xtrain, ytrain)

    print("Training score: ", xgbr.score(xtrain, ytrain))
    print("Test score: ", xgbr.score(xtest, ytest))

    ypred = xgbr.predict(xtest)
    mse = mean_squared_error(ytest, ypred)
    print("MSE: %.2f" % mse)
    print("RMSE: %.2f" % (mse**(1/2.0)))

    _ = plot_importance(xgbr, height=0.9)
    
    return xgbr


## SOC Calculations

def coulomb_counting(df):
    """
    Return SOC series, determined using coulomb counting
    """
    total_capacity_As = 8.708 * 3600 # in As
    time_interval =  df["Datetime"].diff().dt.total_seconds().fillna(0)
    energy  = (df["Current"]/1000) * time_interval # Convert power from mA to A
    remaining_energy = total_capacity_As - energy.cumsum()
    soc = remaining_energy / total_capacity_As
    return soc

def thevenin_model(df):
    """
    Return SOC series, determined from Current and Voltage only.
    """
    total_capacity_As = 8.708 * 3600 # in As
    time_interval = df["Datetime"].diff().dt.total_seconds().fillna(0)
    energy  = (df["Current"]/1000) * time_interval # Convert power from mA to A
    remaining_energy = total_capacity_As - energy.cumsum()
    soc = remaining_energy / total_capacity_As
    return soc

def ML_trained_by_coulomb_counting(df):
    """
    Return SOC series, determined from ML model, trained on Coulomb counting. 
    Uses Voltage, Current, Power and Temperature to determine SOC
    """
    x = df[params_x_for_ml_soc] # WARNING: df_ml_test must be a full discharge of battery from full to empty
    soc = xgbr_SOC.predict(x)
    return soc

def add_soc_feature(df, method):
    soc = method(df)
    df.loc[:,"SOC"] = soc

### Process and Display Charging data

In [None]:
# WARNING: Ensure 1970 filter is removed, because the data is not timestamped
df_ina = process_charge_data([raw_data_path+"data_charge_14-5-21.csv"])

In [None]:
def resample_ina_df(df_ina):
    df_ina = df_ina.set_index("Datetime")
    df_ina = df_ina.resample('1S').mean()

    display_charge_data(df_ina)

df_ina_subset = df_ina.head(2500000)
# Display charging profile
resample_ina_df(df_ina_subset)

# Do machine learning

## Generate training data

In [None]:
df_ml_train, raw_dfs_train = process_for_ml([
# raw_data_path+"hampsted_trip_1-4-2021.csv",
# raw_data_path+"data_19-4-21.csv",
# raw_data_path+"data_icah_20-4-21.csv",
# raw_data_path+"data_27-4-21.csv",
# raw_data_path+"data_28-4-21.csv",                                          
# raw_data_path+"data_6-5-21.csv",
# raw_data_path+"data_8-5-2021.csv",
# raw_data_path+"data_10-5-21.csv",
# raw_data_path+"data_10-5-21-v2.csv",
#raw_data_path+"data_11-5-21.csv",
raw_data_path+"data_12-5-21-v1.csv",
raw_data_path+"data_12-5-21-v2.csv",
raw_data_path+"data_13-5-21-clean.csv",
# raw_data_path+"data_14-5-21-putney-heath-circuit.csv",
# raw_data_path+"data_15-5-21.csv",
# raw_data_path+"data_17-5-21.csv",
# raw_data_path+"data_18-5-21_enoch.csv"
])

In [None]:
df_ml_test, raw_dfs_test = process_for_ml([raw_data_path+"data_11-5-21.csv"])

In [None]:
df_ml_soc_trainer, _ = process_for_ml([raw_data_path+"data_17-5-21.csv"])

### Train ML model to predict SOC from voltage and Current

In [None]:
add_soc_feature(df_ml_soc_trainer, coulomb_counting)
x, y = split_x_y(df_ml_soc_trainer, params_x_for_ml_soc, params_y_for_ml_soc) # WARNING: df_ml_test must be a full discharge of battery from full to empty
xgbr_SOC = do_ml(x, y)

#### Add SOC as a feature

In [None]:
add_soc_feature(df_ml_train, ML_trained_by_coulomb_counting)
add_soc_feature(df_ml_test, ML_trained_by_coulomb_counting)

## Display vairables for initial viewing

In [None]:
display_all_variables(*raw_dfs_train)

In [None]:
display_all_variables(*raw_dfs_test)

In [None]:
display_gps_positions(raw_dfs_train[1]) # df_gps is index 1 TODO: don't use indexes. use labels for readiblity

In [None]:
display_gps_positions(raw_dfs_test[1]) # df_gps is index 1 TODO: don't use indexes. use labels for readiblity

### Test Train Split

In [None]:
xtrain, ytrain = split_x_y(df_ml_train, params_x_for_ml, params_y_for_ml)

In [None]:
xtest, ytest = split_x_y(df_ml_test, params_x_for_ml, params_y_for_ml)

## Export dataframe to pickle

In [None]:
def export_to_pickle(df):
    print(df.head())
    df.to_pickle("df_ml_train.pkl")

export_to_pickle(df_ml_train)

### Display SOC vs power

In [None]:
fig =  px.line(df_ml_train, x="SOC", y="Power", title='SOC vs Power')
fig.update_xaxes(autorange="reversed")
fig.update_yaxes(title = "Power[mW]")
fig.write_html("output/soc_vs_power.html")
fig.show()

### Display SOC vs Time

In [None]:
fig =  px.line(df_ml_test, x=df_ml_test.index, y="SOC", title='SOC over Time')
fig.update_yaxes(title = "Power[mW]")
fig.write_html("output/soc_vs_power.html")
fig.show()

## Group power into grid squares of longitude/latitude

In [None]:
def group_data_location(df):
    step = 0.0002
    to_bin = lambda x: np.floor(x / step) * step
    df["latbin"] = df.latitude.map(to_bin)
    df["lonbin"] = df.longitude.map(to_bin)
    groups = df.groupby(["latbin", "lonbin"]).mean()
    return groups

def group_data_time_interval(df):
    groups = df.groupby(pd.Grouper(key="Datetime", freq="1s")).mean()
    #df["Datetime"] = df.index
    groups = groups.dropna()
    return groups

df_ml_train_grouped = group_data_time_interval(df_ml_train)
display_gps_positions_bins(df_ml_train_grouped)
# df_ml_train_grouped = group_data_location(df_ml_train)
# display_gps_positions_bins(df_ml_train_grouped)

## Display distribution of power parameters

In [None]:
import plotly.express as px
fig = px.histogram(raw_dfs_train[0], x="Power", title='Power Distribution')
fig.update_xaxes(title_text="Power[mW]")
fig.show()
fig = px.histogram(raw_dfs_train[0], x="Battery_Voltage", title='Battery Voltage Distribution')
fig.update_xaxes(title_text="Voltage[V]")
fig.show()
fig = px.histogram(raw_dfs_train[0], x="Current", title='Current Distribution')
fig.update_xaxes(title_text="Current[mA]")

fig.show()

### Display speed vs power

In [None]:
fig =  px.line(raw_dfs_train[4], x="Datetime", y="filtered_motor_rpm", title='Speed vs Time').show()
fig =  px.line(raw_dfs_train[4], x="Datetime", y="motor_rpm", title='Speed vs Time').show()

### Display Lat/Long vs power(unbinned)

In [None]:
#latitude_start_pt, longitude_start_pt = 51.45282, -0.2275045
from scipy.signal import find_peaks

L1 = [51.45282, -0.2275045]
def plot_proximity_to_start_point(df):
    df['distance'] = df[['latitude', 'longitude']].sub(np.array(L1)).pow(2).sum(1).pow(0.5)
    
    fig = px.line(df, x="Datetime_copy", y="distance", title='Distance from start point').show()
    
    time_series = df['distance']
    indices = find_peaks(-time_series, distance = 2000,height=-0.0005)[0]
    
    
    df["loop_number"] = 0
    for i in range(len(indices)-1):
        rows = range(indices[i],indices[i+1])
        
        df.loc[rows, "loop_number"] = i+1
    

    fig = px.line(df, x="latitude", y="Power", color='loop_number', title= "Power profile on each loop")
    fig.write_html("output/Power_profile_on_each_loop.html")

    fig.show()

    
plot_proximity_to_start_point(df_ml_train)

### Calculate Energy Consumption each loop

In [None]:
def calculate_energy_consumption_for_each_loop(df_ml):
    energies_per_loop = []
    for i in df_ml['loop_number'].unique():
        
        df_loop = df_ml[df_ml["loop_number"] == i]
        energy_in_loop = energy_from_power_time(df_loop["Datetime"], df_loop["Power"])
        energies_per_loop.append((i,energy_in_loop))
                                 
    return pd.DataFrame(energies_per_loop, columns=['Loop_number', 'Energy_Kilo_Joules'])

def plot_energy_consumption_per_loop(df_ml):
    energies_per_loop = calculate_energy_consumption_for_each_loop(df_ml)       
    fig = px.line(energies_per_loop, x="Loop_number", y="Energy_Kilo_Joules", title='Energy consumption of each loop around Putney Heath')
    fig.update_layout(yaxis_range=[48,65])
    fig.write_html("output/Energy_consumption_per_loop.html")
    fig.show()

plot_energy_consumption_per_loop(df_ml_train)

### Display Lat/Long vs power(binned)

In [None]:
def plot_proximity_to_start_point_binned(df):
    df['distance'] = df[['latitude', 'longitude']].sub(np.array(L1)).pow(2).sum(1).pow(0.5)
    
    
    time_series = df['distance']
    indices = find_peaks(-time_series,
                         distance = 2000,
                         height=-0.0005)[0]
    
    
    df["ts"] = df.index.values
    df["loop_number"] = 0
    for i in range(len(indices)-1):
        
        start_time = df["ts"].iloc[indices[i]]
        end_time = df["ts"].iloc[indices[i+1]]        
        df.loc[start_time:end_time, "loop_number"] = i+1
    
    fig = px.line(df, x="latitude", y="Power", color='loop_number', title= "Power profile on each loop")
    fig.write_html("output/Power_profile_on_each_loop_binned.html")

    fig.show()

plot_proximity_to_start_point_binned(df_ml_train)

In [None]:
fig =  px.scatter(df_ml_train, x="latitude", y="Power", title='Latitude vs Power').show()

### Display IMU data

In [None]:

def display_imu_plots(df_imu):
    fig = go.Figure()
    
    # Add traces
    fig.add_trace(go.Scatter(x=df_imu["gyro_x"],
                             y=df_imu["acceleration_x"],
                             mode='markers',
                             marker=dict(size=1),
                             name='Raw data'
                            )
                 )
    
    fig.add_trace(go.Scatter(x=df_imu["gyro_x_filtered"], 
                             y=df_imu["acceleration_x_filtered"],
                             mode='markers',
                             marker=dict(size=1),
                             name='Filtered data'

                            )
                 )
    
    fig.update_layout(
        title="IMU Acceleration vs Angular velocity(gyro)",
        xaxis_title="Angular Velocity[rad/s]",
        yaxis_title="Acceleration[m/s^2]",
    )
    fig.show()
                  
display_imu_plots(raw_dfs_train[5])

In [None]:
fig = px.scatter(raw_dfs_train[5], x="gyro_x", y="acceleration_x", title='Acceleration[m/s^2] vs Angular velocity[rad/s]')
fig.update_traces(marker=dict(size=1))
fig.show()

In [None]:
fig = px.scatter(raw_dfs_train[5], x="Datetime", y=["acceleration_x","acceleration_x_filtered"], title='Acceleration over Time').show()

In [None]:
fig = px.line(df_ml_train, x="Datetime", y=["Power","Power_averaged"], title='Power over Time').show()

## Train ML model

In [None]:
x, y = split_x_y(df_ml_train, params_x_for_ml, params_y_for_ml)
xgbr = do_ml(x, y)

## Plot predicted and actual data

In [None]:
def plot_predicted_data(xgbr, timestamps, ytest, ypred):
    
    fig = go.Figure()
    fig.add_traces(go.Scatter(x=timestamps, y=ytest, name='Actual data'))
    fig.add_traces(go.Scatter(x=timestamps, y=ypred, name='Regression Fit'))

    fig.update_layout(
        title="Power consumption, predicted",
        xaxis_title="Time(UTC)",
        yaxis_title="Power[mW]",
    )
    fig.write_html("output/Predicted_plot.html")

    fig.show()

x, y  = split_x_y(df_ml_test, params_x_for_ml, params_y_for_ml)
ypred = xgbr.predict(x)

print_power_consumption_score(df_ml_test["Datetime"], y, ypred)
plot_predicted_data(xgbr, df_ml_test["Datetime"], y, ypred)

## Display interesting variables

In [None]:
display_interesting_variables(df_ml_test, xgbr)

## Use Neural Network with multidimensional input to do regression
Currently, we pass rows into the XG boost model. What if we could insert a snap shot of 10 seconds of data containing all the features, and calculating the energy consumption of this snapshot? Its like a photograph used in Deep Neural Networks: 2 dimensional input.