In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

In [3]:
pyro.set_rng_seed(0)

In [4]:
df = pd.read_excel('pack_benchmark_spec_v1.055.xlsm','Cells',header=[0,1],skiprows=1,nrows=1102)

## Data analysis

In [9]:
feature_set_0 = [('Capacity','Ah (nom)'),('Voltage','Nom'),
                 ('Mass','kg'),('Format','Unnamed: 14_level_1'),
                 ('Chemistry','Family'),('Chemistry','Cathode'),('Chemistry','Anode'),
                 ('Discharge','Ampsmax'),('Discharge','Ampscont'),
                 ('Charge','Ampsmax'),('Charge','Ampscont'),
                 ('Cycle Ageing','to 70% SoH'),('Cycle Ageing','to 80% SoH')]

In [10]:
df.loc[:,feature_set_0].describe()

Unnamed: 0_level_0,Capacity,Voltage,Mass,Discharge,Discharge,Charge,Charge,Cycle Ageing,Cycle Ageing
Unnamed: 0_level_1,Ah (nom),Nom,kg,Ampsmax,Ampscont,Ampsmax,Ampscont,to 70% SoH,to 80% SoH
count,1099.0,1049.0,986.0,261.0,730.0,414.0,657.0,84.0,183.0
mean,53.910874,3.467774,1.22587,298.912859,123.278508,99.09708,37.085752,3179.166667,3140.73224
std,97.407426,0.509421,2.674496,719.616163,252.415326,264.029472,74.246094,3265.594729,4662.17618
min,0.026,1.2,0.0042,2.5,0.02,0.825,0.008,200.0,250.0
25%,3.4,3.2,0.061475,20.0,10.0875,5.0,2.0,600.0,1000.0
50%,10.0,3.6,0.269,120.0,33.5,20.0,4.4,2000.0,2000.0
75%,62.75,3.7,1.42,380.0,150.0,110.0,30.0,5250.0,3500.0
max,1175.0,12.0,38.0,10000.0,3000.0,3000.0,500.0,13000.0,35000.0


In [11]:
df.loc[:,feature_set_0].describe(include=['object','category'])

Unnamed: 0_level_0,Format,Chemistry,Chemistry,Chemistry
Unnamed: 0_level_1,Unnamed: 14_level_1,Family,Cathode,Anode
count,1100,797,436,118
unique,6,23,17,16
top,Cylindrical,LFP,Lithium Iron Phosphate,Gr
freq,468,368,211,48


In [12]:
df.loc[:,'Format'].value_counts()

Unnamed: 14_level_1
Cylindrical            468
Prismatic              335
Pouch                  266
Blade                   21
cylindrical              8
Cylindrical              2
Name: count, dtype: int64

In [13]:
df.loc[:,('Chemistry','Family')].value_counts()

(Chemistry, Family)
LFP                    368
NMC                    197
NCA                     56
NMC, NCA, NMC/NCA       49
LCO                     40
LTO                     19
NaB                     17
NCM/NCA                 13
LMO                      7
Solid State              5
PbA                      4
NiCd                     4
NMCA                     3
NiMh                     3
LiS                      2
LMFP                     2
NMX                      2
BMLMP                    1
NMC 811                  1
Spinel                   1
Silicon carbon cell      1
LNCAO                    1
Li-S                     1
Name: count, dtype: int64

In [14]:
df.loc[:,('Chemistry','Cathode')].value_counts()

(Chemistry, Cathode)
Lithium Iron Phosphate    211
Nickel rich               151
Lithium Cobalt Oxide       33
NMC811                     12
NMC622                      7
Li metal/Liquid             5
Lithium Titanate            4
Gr+Si                       3
NaMO2                       2
NMC523                      1
NMC                         1
712                         1
NMC712                      1
NMC111+NCA                  1
High Nickel NCM+ Gr         1
High Nickel + SiC/SiOx      1
NMC955                      1
Name: count, dtype: int64

In [15]:
df.loc[:,('Chemistry','Anode')].value_counts()

(Chemistry, Anode)
Gr                  48
Graphite            26
Si                   9
Li                   8
Aluminium            8
Gr/SiO               5
Gr-Si                4
Li metal             2
LTO                  1
Hard carbon          1
SiC                  1
Graphite+Silicon     1
Soft carbon          1
Gr+LTO               1
Gr+SiO               1
Gr+Si                1
Name: count, dtype: int64

## ML model
We will use the VAE-based model from [this paper](https://www.sciencedirect.com/science/article/pii/S0031320320303046#bib0045). The basic idea is to use a neural network model to 'reconstruct' the data. This is done by forcing it through a bottleneck in the NN, the _latent space_.