In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch
import torch.nn as nn
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

In [4]:
pyro.set_rng_seed(0)

In [3]:
df = pd.read_excel('pack_benchmark_spec_v1.042.xlsm','Cells',header=[0,1],skiprows=1,nrows=1102)

## Data analysis

In [15]:
feature_set_0 = [('Capacity','Ah (nom)'),('Voltage','Nom'),
                 ('Mass','kg'),('Format','Unnamed: 13_level_1'),
                 ('Chemistry','Family'),('Chemistry','Cathode'),('Chemistry','Anode'),
                 ('Discharge','Ampsmax'),('Discharge','Ampscont'),
                 ('Charge','Ampsmax'),('Charge','Ampscont'),
                 ('Cycle Ageing','to 70% SoH'),('Cycle Ageing','to 80% SoH')]

In [None]:
df.loc[:,feature_set_0].describe()

Unnamed: 0_level_0,Capacity,Voltage,Mass,Discharge,Discharge,Charge,Charge,Cycle Ageing,Cycle Ageing
Unnamed: 0_level_1,Ah (nom),Nom,kg,Ampsmax,Ampscont,Ampsmax,Ampscont,to 70% SoH,to 80% SoH
count,1096.0,1040.0,996.0,263.0,749.0,439.0,674.0,83.0,180.0
mean,49.028663,3.468543,1.139255,306.046601,122.202017,99.171392,36.199128,3084.939759,3306.966667
std,86.74296,0.521512,2.542226,717.768219,250.885797,259.607692,74.335621,3168.477889,5087.746627
min,0.026,1.2,0.0042,2.5,0.02,0.825,0.008,200.0,250.0
25%,3.432645,3.2,0.0645,20.0,10.0,4.8,1.77,600.0,1000.0
50%,8.5,3.6,0.245,128.0,30.0,17.5,4.0,2000.0,2000.0
75%,57.25,3.7,1.245,400.0,150.0,110.0,26.6375,5000.0,3500.0
max,1000.0,12.0,38.0,10000.0,3000.0,3000.0,500.0,13000.0,35000.0


In [21]:
df.loc[:,feature_set_0].describe(include=['object','category'])

Unnamed: 0_level_0,Format,Chemistry,Chemistry,Chemistry
Unnamed: 0_level_1,Unnamed: 13_level_1,Family,Cathode,Anode
count,1094,783,440,130
unique,7,24,19,17
top,Cylindrical,LFP,Lithium Iron Phosphate,Gr
freq,461,340,211,47


In [24]:
df.loc[:,'Format'].value_counts()

Unnamed: 13_level_1
Cylindrical            461
Prismatic              316
Pouch                  280
Blade                   19
Cylinridcal              8
cylindrical              8
Cylindrical              2
Name: count, dtype: int64

In [25]:
df.loc[:,('Chemistry','Family')].value_counts()

(Chemistry, Family)
LFP                    340
NMC                    200
NCA                     56
NMC, NCA, NMC/NCA       49
LCO                     41
LTO                     28
NaB                     17
NCM/NCA                 13
LMO                      7
Solid State              5
NiCd                     4
PbA                      4
NMCA                     3
NiMh                     3
LiS                      2
LMFP                     2
NMX                      2
BMLMP                    1
NMC 811                  1
Spinel                   1
Silicon carbon cell      1
LNCAO                    1
Li-S                     1
Chemistry                1
Name: count, dtype: int64

In [26]:
df.loc[:,('Chemistry','Cathode')].value_counts()

(Chemistry, Cathode)
Lithium Iron Phosphate    211
Nickel rich               154
Lithium Cobalt Oxide       33
NMC811                     11
NMC622                      7
Li metal/Liquid             5
Lithium Titanate            4
Gr+Si                       3
NaMO2                       2
NMC523                      1
NMC                         1
712                         1
NMC712                      1
NMC111+NCA                  1
High Nickel NCM+ Gr         1
High Nickel + SiC/SiOx      1
NMC955                      1
#                           1
40                          1
Name: count, dtype: int64

In [27]:
df.loc[:,('Chemistry','Anode')].value_counts()

(Chemistry, Anode)
Gr                  47
Graphite            28
Aluminium           10
Lithium Titanate     9
Si                   9
Li                   8
Gr/SiO               5
Gr-Si                4
SiOx                 2
Graphite+Silicon     1
Gr+LTO               1
Gr+SiO               1
Gr+Si                1
SiC                  1
Hard carbon          1
LTO                  1
Soft carbon          1
Name: count, dtype: int64

## ML model
We will use the VAE-based model from [this paper](https://www.sciencedirect.com/science/article/pii/S0031320320303046#bib0045). The basic idea is to use a neural network model to 'reconstruct' the data. This is done by forcing it through a bottleneck in the NN, the _latent space_.

In [None]:
# Code currently borrowed directly from https://pyro.ai/examples/vae.html

class Decoder(nn.Module):
    def __init__(self, z_dim, hidden_dim, output_dim):
        super().__init__()
        # setup the two linear transformations used
        self.fc1 = nn.Linear(z_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, output_dim)
        # setup the non-linearities
        self.softplus = nn.Softplus()
        self.sigmoid = nn.Sigmoid()

    def forward(self, z):
        # define the forward computation on the latent z
        # first compute the hidden units
        hidden = self.softplus(self.fc1(z))
        # return the parameter for the output Bernoulli
        # each is of size batch_size x input_dim
        loc_img = self.sigmoid(self.fc21(hidden))
        return loc_img

In [6]:
class Encoder(nn.Module):
    def __init__(self, z_dim, hidden_dim, input_dim):
        super().__init__()
        # setup the three linear transformations used
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, z_dim)
        self.fc22 = nn.Linear(hidden_dim, z_dim)
        # setup the non-linearities
        self.softplus = nn.Softplus()

    def forward(self, x):
        # define the forward computation on the image x
        # first shape the mini-batch to have pixels in the rightmost dimension
        x = x.reshape(-1, self.input_dim)
        # then compute the hidden units
        hidden = self.softplus(self.fc1(x))
        # then return a mean vector and a (positive) square root covariance
        # each of size batch_size x z_dim
        z_loc = self.fc21(hidden)
        z_scale = torch.exp(self.fc22(hidden))
        return z_loc, z_scale