In [2]:
import pandas as pd

def inspect_csv(path, n=5):
    print("=" * 80)
    print(f"FILE: {path}")
    print("=" * 80)
    
    df = pd.read_csv(path)
    
    print("\n[Columns]")
    print(df.columns.tolist())
    
    print("\n[Head]")
    display(df.head(n))
    
    print("\n[Null count]")
    print(df.isna().sum())
    
    print("\n[Row count]")
    print(len(df))
    
    return df

 Stage A: NASA

In [3]:
# NASA metadata (구조 파악용)
nasa_meta = inspect_csv("../data_csv/NASAmetadata.csv")

FILE: ../data_csv/NASAmetadata.csv

[Columns]
['type', 'start_time', 'ambient_temperature', 'battery_id', 'test_id', 'uid', 'filename', 'Capacity', 'Re', 'Rct']

[Head]


Unnamed: 0,type,start_time,ambient_temperature,battery_id,test_id,uid,filename,Capacity,Re,Rct
0,discharge,[2010. 7. 21. 15. 0. ...,4,B0047,0,1,00001.csv,1.6743047446975208,,
1,impedance,[2010. 7. 21. 16. 53. ...,24,B0047,1,2,00002.csv,,0.0560578334388809,0.2009701658445833
2,charge,[2010. 7. 21. 17. 25. ...,4,B0047,2,3,00003.csv,,,
3,impedance,[2010 7 21 20 31 5],24,B0047,3,4,00004.csv,,0.053191858509211,0.1647339991486473
4,discharge,[2.0100e+03 7.0000e+00 2.1000e+01 2.1000e+01 2...,4,B0047,4,5,00005.csv,1.5243662105099025,,



[Null count]
type                      0
start_time                0
ambient_temperature       0
battery_id                0
test_id                   0
uid                       0
filename                  0
Capacity               4771
Re                     5609
Rct                    5609
dtype: int64

[Row count]
7565


Stage B: liBattery

In [4]:
li_df = inspect_csv("../data_csv/liBattery_Data_Cleaned.csv")

FILE: ../data_csv/liBattery_Data_Cleaned.csv

[Columns]
['type', 'ambient_temperature', 'battery_id', 'test_id', 'uid', 'filename', 'Capacity', 'Re', 'Rct']

[Head]


Unnamed: 0,type,ambient_temperature,battery_id,test_id,uid,filename,Capacity,Re,Rct
0,-1,4,47,0,1,00001.csv,0.983689,0.054543,0.18313
1,0,24,47,1,2,00002.csv,0.983689,0.054543,0.18313
2,1,4,47,2,3,00003.csv,0.983689,0.054543,0.18313
3,0,24,47,3,4,00004.csv,0.983689,0.051825,0.152493
4,-1,4,47,4,5,00005.csv,0.92599,0.051825,0.152493



[Null count]
type                   0
ambient_temperature    0
battery_id             0
test_id                0
uid                    0
filename               0
Capacity               0
Re                     0
Rct                    0
dtype: int64

[Row count]
7368


Stage C-1: Synthetic degradation

In [5]:
synth_df = inspect_csv("../data_csv/battery_degradation.csv")

FILE: ../data_csv/battery_degradation.csv

[Columns]
['Unnamed: 0', 'battery_id', 'voltage', 'current', 'temp', 'time', 'rul']

[Head]


Unnamed: 0.1,Unnamed: 0,battery_id,voltage,current,temp,time,rul
0,0,B0005,4.201969,-0.000857,25.093297,0.0,2820.39
1,1,B0005,4.200942,-0.001128,25.104378,9.328,2811.062
2,2,B0005,3.98226,-2.009929,25.106192,19.515,2800.875
3,3,B0005,3.959736,-2.01445,25.178775,28.937,2791.453
4,4,B0005,3.942182,-2.015093,25.276002,38.312,2782.078



[Null count]
Unnamed: 0    0
battery_id    0
voltage       0
current       0
temp          0
time          0
rul           0
dtype: int64

[Row count]
8434


Stage C-2: EV synthetic

In [6]:
ev_synth_df = inspect_csv("../data_csv/ev_battery_synth.csv")

FILE: ../data_csv/ev_battery_synth.csv

[Columns]
['battery_id', 'manufacturer', 'chemistry', 'capacity_kWh', 'charge_cycles', 'avg_temp_celsius', 'discharge_rate_c', 'charge_rate_c', 'avg_soc_percent', 'storage_time_months', 'fast_charge_ratio', 'calendar_age_years', 'capacity_retained_percent']

[Head]


Unnamed: 0,battery_id,manufacturer,chemistry,capacity_kWh,charge_cycles,avg_temp_celsius,discharge_rate_c,charge_rate_c,avg_soc_percent,storage_time_months,fast_charge_ratio,calendar_age_years,capacity_retained_percent
0,BAT000001,Panasonic,NMC,65.54,1524.0,26.61,1.507,0.5,51.96,0.1,0.155,1.58,81.75
1,BAT000002,Samsung SDI,LFP,,1072.0,16.82,0.85,0.927,80.76,11.0,0.466,3.44,81.93
2,BAT000003,BYD,LFP,99.99,807.0,6.27,0.751,1.232,74.73,3.5,0.058,0.23,89.75
3,BAT000004,CATL,NMC,85.96,681.0,28.65,0.788,1.07,30.95,0.2,0.332,3.43,84.41
4,BAT000005,Tesla,NMC,69.85,476.0,41.57,0.683,0.607,59.67,3.6,0.307,0.34,87.04



[Null count]
battery_id                     0
manufacturer                 685
chemistry                    744
capacity_kWh                 732
charge_cycles                689
avg_temp_celsius             718
discharge_rate_c             720
charge_rate_c                711
avg_soc_percent              759
storage_time_months          741
fast_charge_ratio            746
calendar_age_years           797
capacity_retained_percent    751
dtype: int64

[Row count]
15000


In [7]:
health_df = inspect_csv("../data_csv/health_timeseries_core_state.csv")

FILE: ../data_csv/health_timeseries_core_state.csv

[Columns]
['user_id', 'date', 'mean_hr', 'hr_std', 'steps', 'calories', 'sleep_minutes', 'health_state_level', 'health_state_speed', 'health_state_index']

[Head]


Unnamed: 0,user_id,date,mean_hr,hr_std,steps,calories,sleep_minutes,health_state_level,health_state_speed,health_state_index
0,1503960366,2016-04-12,,,13162,1985,327.0,-0.002802,0.0,-0.002802
1,1503960366,2016-04-13,,,10735,1797,384.0,0.054147,0.056949,0.45279
2,1503960366,2016-04-14,,,10460,1776,,,,
3,1503960366,2016-04-15,,,9762,1745,412.0,0.064168,,
4,1503960366,2016-04-16,,,12669,1863,340.0,0.005229,,



[Null count]
user_id                 0
date                    0
mean_hr               935
hr_std                935
steps                   0
calories                0
sleep_minutes         530
health_state_level    273
health_state_speed    453
health_state_index    463
dtype: int64

[Row count]
943
