In [None]:
import scipy.io
import numpy as np
import pandas as pd
from datetime import datetime

This function shows a breakdown of the battery data.

In [None]:
def Battery_data_breakdown(Battery_name,Battery):
  print(f"Battery name: {Battery_name}")
  print(f"Battery data is now in {type(battery)} format.")
  battery["cycle"][0,0].shape
  print(f"cycle has a shape of {battery["cycle"][0,0].shape}")
  print(f"Each column in Cycle has {len(battery["cycle"][0,0][0,1].dtype.names)} field names")
  print(battery["cycle"][0,0][0,1].dtype.names)
  print(f"The 'data' field name shows the following field names {battery["cycle"][0,0][0,2]["data"].dtype.names} within the multi nested array.")


We want to confirm that each iteration is one after the other and we use the sort_values to confirm that the index position aligns with the battery test timestamp.

We partition the rows after sorting the dataframe based on the battery timestamp. <br>

We compare columns "Measurement Data Entry Number" with "Battery Measurement Partitioning" to confirm that the order of the timestamps is in a chronological order.

This is a data validation check to ensure that each battery measurement will align with the correct cycle.

In [None]:

def Validation_check_battery_measurement_order(Battery_name,battery):
  dict_time_of_measurement = {}
  print(Battery_name)
  for i in range (battery["cycle"][0,0].size):
      type_of_measurement = str(battery["cycle"][0,0][0,i]["type"][0])
      time = battery["cycle"][0,0][0,i]["time"][0].tolist()
      Battery_cycle_time = datetime(year = int(time[0]),
          month = int(time[1]),
          day = int(time[2]),
          hour = int(time[3]),
          minute = int(time[4]),
          second = int(time[5]))
      dict_time_of_measurement.update({i:[type_of_measurement,Battery_cycle_time,i+1, Battery_name]})
    #print(Battery_cycle_time)
  df_Battery_measurements = pd.DataFrame.from_dict(dict_time_of_measurement, orient='index')
  df_Battery_measurements.columns = ['Type of measurement','Battery cycle time','Measurement Data Entry Number','Battery name']
  df_Battery_measurements

  df_Battery_measurements_sorted = df_Battery_measurements.sort_values(by='Battery cycle time', ascending=True).copy()
  df_Battery_measurements_sorted['Battery Measurement Partitioning'] = df_Battery_measurements_sorted.groupby('Battery name').cumcount() + 1
  Condition = df_Battery_measurements_sorted['Measurement Data Entry Number'] != df_Battery_measurements_sorted['Battery Measurement Partitioning']
  if len(df_Battery_measurements_sorted[Condition]) > 0:
    return print("Measurement entries not in chronological order")
  else:
    return print("Validation passed")

We need to get the initial capacity of the battery to determine the SOH value after each cycle. Some batteries were not at full capacity when the experiment was started. This can lead to an incorrect initial capacity value. For example, using the initial measured battery capacity when the battery was only 75% full at the start of the experiment.

The function below waits for the battery to be in a charge state (fully charged) before it calculates the initial capacity (after the 1st charge state).

In [None]:
def get_inital_capacity(Battery_name,battery):
  capacity_values = []
  charge = 0
  for i in range (battery["cycle"][0,0].size):
    if str(battery["cycle"][0,0][0,i]["type"][0]) == 'charge':
      charge = charge +1
      #print(charge)
    if str(battery["cycle"][0,0][0,i]["type"][0]) == 'discharge' and charge > 0: #When charge is less than 1, the battery may not have been at full capacity. We only get the inital capacity after the the first charge.
      #print((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
      capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
      #print(capacity_values)
      inital_capacity = capacity_values[0] #When charge = 1, the battery has not gone through any full cycle therefore, the capacity is the inital capacity.
  return inital_capacity


We extract features from charge and discharge.

We do not extract any impediance values because impediance was not always measured per cycle. Also impediance was inconsistently measured, sometimes it was measured two times per cycle.  

We increment the cycle only when there is a new charge state.
We put the features into a dictionary then later convert the dictionary into a dataframe.

In [None]:
def input_feature_extraction(Battery_name,battery):
  cycle = 0
  discharge = 0
  input_features = {}
  df_input_features = pd.DataFrame()
  for i in range (battery["cycle"][0,0].size):
     if str(battery["cycle"][0,0][0,i]["type"][0]) == 'charge':
        cycle = cycle + 1
        type_of_measurement = str(battery["cycle"][0,0][0,i]["type"][0])
        time = battery["cycle"][0,0][0,i]["time"][0].tolist()
        Battery_measurement_time = datetime(year = int(time[0]),month = int(time[1]), day = int(time[2]),hour = int(time[3]), minute = int(time[4]),second = int(time[5]))

        #Charge Time feature extraction
        Charge_time_measured = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Time"])

        #Voltage related feature extractions
        Charge_min_Voltage = np.min(battery["cycle"][0,0][0,i]["data"][0,0]["Voltage_measured"])
        Charge_max_Voltage = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Voltage_measured"])

        #Current related feature extractions
        Charge_min_Current_measured = np.min(battery["cycle"][0,0][0,i]["data"][0,0]["Current_measured"])
        Charge_max_Current_measured = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Current_measured"])

        #Temperature feature extraction
        Charge_Temp_measured = np.mean(battery["cycle"][0,0][0,i]["data"][0,0]["Temperature_measured"])

     if str(battery["cycle"][0,0][0,i]["type"][0]) == 'discharge' and cycle > 0:
        #Discharge features extracted

        #Time feature extraction
        Discharge_time_measured = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Time"])

        #Voltage related feature extractions
        Discharge_min_Voltage_measured = np.min(battery["cycle"][0,0][0,i]["data"][0,0]["Voltage_measured"])
        Discharge_max_Voltage_measured = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Voltage_measured"])

        #Current related feature extractions
        Discharge_min_Current_measured = np.min(battery["cycle"][0,0][0,i]["data"][0,0]["Current_measured"])
        Discharge_max_Current_measured = np.max(battery["cycle"][0,0][0,i]["data"][0,0]["Current_measured"])

        #Temperature feature extraction
        Discharge_Temp_measured = np.mean(battery["cycle"][0,0][0,i]["data"][0,0]["Temperature_measured"])

        #Capacity feature extraction
        Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
        #get_inital_capacity(Battery_name,battery)
        #Derived Target Variable
        SOH = (Capacity/get_inital_capacity(Battery_name,battery))*100

        input_features.update({cycle - 1:[Battery_measurement_time, Battery_name,cycle-1
                        ,Charge_time_measured,Charge_min_Voltage,Charge_max_Voltage
                        ,Charge_min_Current_measured,Charge_max_Current_measured,Charge_Temp_measured
                        ,Discharge_time_measured,Discharge_min_Voltage_measured,Discharge_max_Voltage_measured
                        ,Discharge_min_Current_measured,Discharge_max_Current_measured,Discharge_Temp_measured,Capacity, SOH
                        ]})
  #print(input_features)
  df_input_features= pd.DataFrame.from_dict(input_features, orient='index')
  df_input_features.columns = ['Battery_measurement_time','Battery name','Cycle'
                      ,'Charge_time_measured','Charge_min_Voltage','Charge_max_Voltage'
                      ,'Charge_min_Current_measured','Charge_max_Current_measured','Charge_Temp_measured'
                      ,'Discharge_time_measured','Discharge_min_Voltage_measured','Discharge_max_Voltage_measured'
                     ,'Discharge_min_Current_measured','Discharge_max_Current_measured','Discharge_Temp_measured','Capacity','SOH']
  return df_input_features

In [None]:
Battery_list = ["B0005","B0006","B0018","B0026","B0027","B0028"
                ,"B0030","B0031","B0032","B0033","B0034","B0036","B0038","B0039"
                ,"B0040","B0041","B0042","B0043","B0044","B0045","B0046","B0047"
                ,"B0048","B0049","B0051","B0053","B0054","B0055"
                ,"B0056"]
df = pd.DataFrame()
for b in Battery_list:
  Battery_name = b
  mat = scipy.io.loadmat(b)
  battery = mat[Battery_name]

  Battery_data_breakdown(Battery_name,battery)
  Validation_check_battery_measurement_order(Battery_name,battery)

  #Each battery dataset is iteratively added into the larger df.
  df = pd.concat([df,input_feature_extraction(Battery_name,battery)])

Battery name: B0005
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 616)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0005
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0006
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 616)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0006
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0018
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 319)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_load', 'Voltage_load', 'Time', 'Capacity') within the multi nested array.
B0018
Validation passed
Battery name: B0026
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 80)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Sense_current', 'Battery_current', 'Current_ratio', 'Battery_impedance', 'Rectified_Impedance', 'Re', 'Rct') within the multi nested array.
B0026
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0027
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 80)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Sense_current', 'Battery_current', 'Current_ratio', 'Battery_impedance', 'Rectified_Impedance', 'Re', 'Rct') within the multi nested array.
B0027
Validation passed
Battery name: B0028
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 80)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Sense_current', 'Battery_current', 'Current_ratio', 'Battery_impedance', 'Rectified_Impedance', 'Re', 'Rct') within the multi nested array.
B0028
Validation passed
Battery name: B0030
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 97)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'ti

  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0033
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 486)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0033
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0034
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 486)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0034
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0036
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 486)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0036
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0038
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 122)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0038
Validation passed
Battery name: B0039
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 122)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0039
Validation passed
Battery name: B0040
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 122)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'tim

  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0041
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 163)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Sense_current', 'Battery_current', 'Current_ratio', 'Battery_impedance', 'Rectified_Impedance', 'Re', 'Rct') within the multi nested array.
B0041
Validation passed
Battery name: B0042
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 275)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_load', 'Voltage_load', 'Time', 'Capacity') within the multi nested array.
B0042
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0043
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 275)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_load', 'Voltage_load', 'Time', 'Capacity') within the multi nested array.
B0043
Validation passed
Battery name: B0044
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 275)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_load', 'Voltage_load', 'Time', 'Capacity') within the multi nested array.
B0044
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0045
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 184)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0045
Validation passed
Battery name: B0046
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 184)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0046
Validation passed
Battery name: B0047
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 184)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'tim

  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0048
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 184)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0048
Validation passed
Battery name: B0049
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 62)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0049
Validation passed
Battery name: B0051
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 62)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time'

  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0054
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 253)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0054
Validation passed
Battery name: B0055
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 252)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0055
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))
  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


Battery name: B0056
Battery data is now in <class 'numpy.ndarray'> format.
cycle has a shape of (1, 252)
Each column in Cycle has 4 field names
('type', 'ambient_temperature', 'time', 'data')
The 'data' field name shows the following field names ('Voltage_measured', 'Current_measured', 'Temperature_measured', 'Current_charge', 'Voltage_charge', 'Time') within the multi nested array.
B0056
Validation passed


  Capacity = float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])
  capacity_values.append((float(battery["cycle"][0,0][0,i]["data"][0,0]["Capacity"])))


In [None]:
df

Unnamed: 0,Battery_measurement_time,Battery name,Cycle,Charge_time_measured,Charge_min_Voltage,Charge_max_Voltage,Charge_min_Current_measured,Charge_max_Current_measured,Charge_Temp_measured,Discharge_time_measured,Discharge_min_Voltage_measured,Discharge_max_Voltage_measured,Discharge_min_Current_measured,Discharge_max_Current_measured,Discharge_Temp_measured,Capacity,SOH
0,2008-04-02 13:08:17,B0005,0,7597.875,3.479394,4.209949,-4.030268,1.514393,25.324079,3690.234,2.612467,4.191492,-2.018015,0.000729,32.572328,1.856487,100.000000
1,2008-04-02 16:37:51,B0005,1,10516.000,3.001951,4.213016,-3.361983,1.515178,26.635623,3672.344,2.587209,4.189773,-2.016821,0.002927,32.725235,1.846327,99.452721
2,2008-04-02 20:55:40,B0005,2,10484.547,3.035879,4.212788,-3.384408,1.516894,26.778176,3651.641,2.651917,4.188187,-2.016574,0.001484,32.642862,1.835349,98.861386
3,2008-04-03 01:12:38,B0005,3,10397.890,3.066145,4.212924,-3.412263,1.517503,26.703204,3631.563,2.592948,4.188461,-2.015936,0.001547,32.514876,1.835263,98.856718
4,2008-04-03 05:27:49,B0005,4,10495.203,3.063766,4.212874,-3.403625,1.516949,26.617004,3629.172,2.547420,4.188299,-2.017426,0.001701,32.382349,1.834646,98.823482
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,2010-09-29 16:49:33,B0056,96,10808.032,3.728099,4.200081,-0.000898,1.501263,6.255303,2345.000,2.687277,4.181822,-1.996556,0.003793,10.116013,1.130219,84.071445
97,2010-09-29 20:31:38,B0056,97,10803.219,3.731536,4.200058,-0.001947,1.501476,6.281539,2363.047,2.698012,4.181950,-1.996388,0.003859,10.332244,1.125872,83.748078
98,2010-09-30 00:13:58,B0056,98,10803.406,3.733652,4.200285,0.000647,1.501921,6.234004,2316.687,2.692736,4.181791,-1.996668,0.003194,10.278021,1.143011,85.022956
99,2010-09-30 04:31:22,B0056,99,10802.032,3.771261,4.200213,0.000622,1.501952,6.129910,2322.000,2.689910,4.173014,-1.996444,0.002816,10.263503,1.137273,84.596160


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2486 entries, 0 to 100
Data columns (total 17 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   Battery_measurement_time        2486 non-null   datetime64[ns]
 1   Battery name                    2486 non-null   object        
 2   Cycle                           2486 non-null   int64         
 3   Charge_time_measured            2486 non-null   float64       
 4   Charge_min_Voltage              2475 non-null   float64       
 5   Charge_max_Voltage              2475 non-null   float64       
 6   Charge_min_Current_measured     2475 non-null   float64       
 7   Charge_max_Current_measured     2475 non-null   float64       
 8   Charge_Temp_measured            2475 non-null   float64       
 9   Discharge_time_measured         2486 non-null   float64       
 10  Discharge_min_Voltage_measured  2486 non-null   float64       
 11  Discharge_

In [None]:
Condition = df["Battery name"] == "B0030"
df[Condition]

Unnamed: 0,Battery_measurement_time,Battery name,Cycle,Charge_time_measured,Charge_min_Voltage,Charge_max_Voltage,Charge_min_Current_measured,Charge_max_Current_measured,Charge_Temp_measured,Discharge_time_measured,Discharge_min_Voltage_measured,Discharge_max_Voltage_measured,Discharge_min_Current_measured,Discharge_max_Current_measured,Discharge_Temp_measured,Capacity,SOH
0,2009-04-07 16:58:34,B0030,0,9888.75,3.02182,4.201827,-3.290196,1.518307,45.423717,1723.281,2.140106,4.191572,-4.02766,-0.000787,54.113241,1.781555,100.0
1,2009-04-07 20:14:29,B0030,1,9764.735,3.125027,4.20182,-3.434808,1.518813,45.575756,1705.906,2.165724,4.191315,-4.0264,0.000598,54.157828,1.761969,98.900643
2,2009-04-07 23:28:03,B0030,2,9742.438,3.161116,4.202479,-3.478684,1.518526,45.632809,1697.688,2.095376,4.19151,-4.027128,-0.001595,54.212019,1.751755,98.327313
3,2009-04-08 02:41:07,B0030,3,9674.14,3.17468,4.201855,-3.506427,1.519407,45.658192,1686.094,2.125247,4.191406,-4.027089,0.000209,54.198377,1.738169,97.564708
4,2009-04-08 06:28:36,B0030,4,9801.422,3.317394,4.202138,-3.702089,1.518731,44.936663,1678.563,2.075963,4.180936,-4.026802,-0.000197,54.141617,1.750675,98.266663
5,2009-04-09 10:54:17,B0030,5,9517.672,3.104378,4.201831,-3.392315,1.519537,45.637743,1677.469,2.044007,4.191417,-4.026685,-0.000396,54.211679,1.749105,98.178551
6,2009-04-09 14:03:18,B0030,6,9500.375,3.160019,4.201819,-3.483081,1.519084,45.64791,1666.093,2.173949,4.191679,-4.027008,0.001229,54.186349,1.725254,96.839813
7,2009-04-09 17:11:50,B0030,7,9474.375,3.207455,4.202865,-3.551811,1.518392,45.72364,1654.328,2.171263,4.191661,-4.026836,-0.000719,54.153601,1.711262,96.054416
8,2009-04-09 20:19:45,B0030,8,9446.969,3.224505,4.202787,-3.568337,1.52013,45.731068,1643.812,2.172279,4.190518,-4.027531,6.5e-05,54.168189,1.699042,95.368482
9,2009-04-10 00:02:46,B0030,9,9648.516,3.353701,4.202632,-3.766742,1.51968,45.030225,1652.266,2.139914,4.184948,-4.026805,0.000259,54.220571,1.718577,96.465005


In [None]:
df.to_csv("Battery_input_features_updated.csv")

In [None]:
df.isnull().sum()

Unnamed: 0,0
Battery_measurement_time,0
Battery name,0
Cycle,0
Charge_time_measured,0
Charge_min_Voltage,11
Charge_max_Voltage,11
Charge_min_Current_measured,11
Charge_max_Current_measured,11
Charge_Temp_measured,11
Discharge_time_measured,0
