In [3]:
import pandas as pd
import numpy as np
from random import shuffle

train = pd.read_pickle("datas/training_set.pckl")
validation = pd.read_pickle("datas/validation_set.pckl")

def get_features_charge_item(battery_charge_df):
    voltage_measured = np.mean(battery_charge_df['voltage_measured'])
    current_measured = np.mean(battery_charge_df['current_measured'])
    temperature_measured = np.mean(battery_charge_df['temperature_measured'])
    current_charge = np.mean(battery_charge_df['current_charge'])
    voltage_charge = np.mean(battery_charge_df['voltage_charge'])

    return {'voltage_measured':voltage_measured, 'current_measured': current_measured,
            'temperature_measured': temperature_measured, 'current_charge':current_charge,
            'voltage_charge': voltage_charge}

def process_data(df):
    distinct_batteries = df['battery_nb'].unique()
    features_list = []
    for battery_id in distinct_batteries:
        battery_df = df[df['battery_nb'] == battery_id]
        distinct_charges = df['charge_nb'].dropna().unique()
        discharge_id = 1
        
        for charge_id in distinct_charges:
            battery_charge_df = battery_df[battery_df['charge_nb'] == charge_id]
            battery_discharge_df = battery_df[battery_df['discharge_nb'] == discharge_id]
            
            if battery_charge_df.empty:
                continue

            quality = int(round(np.nanmean(battery_charge_df['quality']), 0))
            features = get_features_charge_item(battery_charge_df)
            features['label'] = quality
            features_list.append(features)
            
            if battery_discharge_df.empty:
                continue
                
            discharge_id += 1
            
            quality = int(round(np.nanmean(battery_discharge_df['quality']), 0))
            features = get_features_charge_item(battery_discharge_df)
            features['label'] = quality
            features_list.append(features)

    shuffle(features_list)

    return pd.DataFrame(features_list)

pd_train = process_data(train)

In [4]:
train

Unnamed: 0,battery_nb,datetime,charge_nb,voltage_measured,current_measured,temperature_measured,current_charge,voltage_charge,ambiant_temp,discharge_nb,capacity,quality
208240,27,2009-02-13 19:35:35.093,1.0,3.286155,-0.000266,26.737494,-0.0006,0.002,24,,,1
208241,27,2009-02-13 19:35:37.609,1.0,2.919267,-2.258031,26.737649,-2.2691,1.828,24,,,1
208242,27,2009-02-13 19:35:40.297,1.0,3.499919,1.490316,26.748312,1.4995,4.238,24,,,1
208243,27,2009-02-13 19:35:42.765,1.0,3.518579,1.492735,26.774100,1.4995,4.263,24,,,1
208244,27,2009-02-13 19:35:45.234,1.0,3.531412,1.491124,26.801882,1.4995,4.288,24,,,1
208245,27,2009-02-13 19:35:47.765,1.0,3.542920,1.491289,26.814379,1.4995,4.301,24,,,1
208246,27,2009-02-13 19:35:50.281,1.0,3.553293,1.491827,26.841875,1.4995,4.301,24,,,1
208247,27,2009-02-13 19:35:52.734,1.0,3.562766,1.493195,26.871709,1.4995,4.314,24,,,1
208248,27,2009-02-13 19:35:55.203,1.0,3.571612,1.490848,26.891278,1.4995,4.326,24,,,1
208249,27,2009-02-13 19:35:57.672,1.0,3.580154,1.490725,26.913060,1.4995,4.339,24,,,1
