### Import libraries and scripts

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import joblib

In [2]:
import sys
import os

sys.path.append(os.path.abspath("../scripts"))

# Import project-specific modules

from load_data import load_data
from evaluate import evaluate_model
from preprocess import preprocess_data

## Load Data


In [3]:
df = load_data("04-03-24", "A3.csv")
df

Unnamed: 0,mode,ox/red,error,control changes,Ns changes,counter inc.,Ns,I Range,time/s,control/V/mA,...,x,Q discharge/mA.h,Q charge/mA.h,Capacity/mA.h,Efficiency/%,control/V,control/mA,cycle number,P/W,R/Ohm
0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,115.0,0.0,0.00,...,1.000,0.0,0.00,0.00,0.0,0.00,0.0,0.0,0.000,0.00
1,2.0,1.0,0.0,1.0,1.0,0.0,1.0,112.0,1840.0,4.08,...,0.999,0.0,5.05,5.05,0.0,4.08,0.0,0.0,1.010,16.50
2,2.0,0.0,0.0,1.0,0.0,0.0,1.0,112.0,1840.0,4.08,...,0.999,0.0,5.07,5.07,0.0,4.08,0.0,0.0,-2.070,8.06
3,2.0,1.0,0.0,1.0,0.0,0.0,1.0,112.0,1850.0,4.08,...,0.999,0.0,5.10,5.10,0.0,4.08,0.0,0.0,2.300,7.26
4,2.0,0.0,0.0,1.0,0.0,0.0,1.0,112.0,1850.0,4.08,...,0.999,0.0,5.12,5.12,0.0,4.08,0.0,0.0,-1.930,8.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79059,2.0,0.0,0.0,1.0,0.0,1.0,6.0,112.0,2420000.0,4.13,...,0.573,0.0,2500.00,2500.00,0.0,4.13,0.0,267.0,-0.248,68.70
79060,2.0,0.0,0.0,1.0,0.0,1.0,6.0,112.0,2420000.0,4.13,...,0.573,0.0,2500.00,2500.00,0.0,4.13,0.0,267.0,-0.358,47.60
79061,2.0,1.0,0.0,1.0,0.0,1.0,6.0,112.0,2420000.0,4.13,...,0.573,0.0,2500.00,2500.00,0.0,4.13,0.0,267.0,1.420,12.00
79062,3.0,1.0,0.0,0.0,1.0,1.0,7.0,115.0,2420000.0,0.00,...,0.573,0.0,2500.00,2500.00,0.0,0.00,0.0,267.0,0.000,0.00


In [4]:
print(df.columns.values)

['mode' 'ox/red' 'error' 'control changes' 'Ns changes' 'counter inc.'
 'Ns' 'I Range' 'time/s' 'control/V/mA' 'Ecell/V' 'I/mA' 'dq/mA.h'
 '(Q-Qo)/mA.h' '|Energy|/W.h' 'freq/Hz' '|Z|/Ohm' 'Phase(Z)/deg'
 'Q charge/discharge/mA.h' 'half cycle' 'Energy charge/W.h'
 'Energy discharge/W.h' 'Capacitance charge/µF' 'Capacitance discharge/µF'
 'step time/s' 'z cycle' 'Re(Z)/Ohm' '#NAME?' 'Re(Y)/Ohm-1' 'Im(Y)/Ohm-1'
 '|Y|/Ohm-1' 'Phase(Y)/deg' 'x' 'Q discharge/mA.h' 'Q charge/mA.h'
 'Capacity/mA.h' 'Efficiency/%' 'control/V' 'control/mA' 'cycle number'
 'P/W' 'R/Ohm']


In [5]:
df = df.rename(columns={"#NAME?": "Im(Z)/Ohm"})

In [6]:
print(df.columns.values)

['mode' 'ox/red' 'error' 'control changes' 'Ns changes' 'counter inc.'
 'Ns' 'I Range' 'time/s' 'control/V/mA' 'Ecell/V' 'I/mA' 'dq/mA.h'
 '(Q-Qo)/mA.h' '|Energy|/W.h' 'freq/Hz' '|Z|/Ohm' 'Phase(Z)/deg'
 'Q charge/discharge/mA.h' 'half cycle' 'Energy charge/W.h'
 'Energy discharge/W.h' 'Capacitance charge/µF' 'Capacitance discharge/µF'
 'step time/s' 'z cycle' 'Re(Z)/Ohm' 'Im(Z)/Ohm' 'Re(Y)/Ohm-1'
 'Im(Y)/Ohm-1' '|Y|/Ohm-1' 'Phase(Y)/deg' 'x' 'Q discharge/mA.h'
 'Q charge/mA.h' 'Capacity/mA.h' 'Efficiency/%' 'control/V' 'control/mA'
 'cycle number' 'P/W' 'R/Ohm']


## Preprocess Data with script

In [7]:
# Define features and target
FEATURE_COLS = ['Re(Z)/Ohm', 'Im(Z)/Ohm']
TARGET_COL = 'Capacity/mA.h'

# Preprocess the data
X_scaled, y_scaled, scaler_X, scaler_y = preprocess_data(df, TARGET_COL, FEATURE_COLS)

### Test-train-split

In [8]:
sample_df = df.sample(frac=0.01, random_state=42)
sample_df

Unnamed: 0,mode,ox/red,error,control changes,Ns changes,counter inc.,Ns,I Range,time/s,control/V/mA,...,x,Q discharge/mA.h,Q charge/mA.h,Capacity/mA.h,Efficiency/%,control/V,control/mA,cycle number,P/W,R/Ohm
75628,1.0,1.0,0.0,1.0,0.0,0.0,3.0,112.0,2310000.0,8000.00,...,1.080,0.0,934.0,934.0,0.0,0.00,8000.0,256.0,32.40,0.507
7730,3.0,1.0,0.0,1.0,0.0,0.0,5.0,115.0,234000.0,0.00,...,0.849,0.0,3700.0,3700.0,0.0,0.00,0.0,26.0,0.00,0.000
50902,2.0,1.0,0.0,1.0,0.0,1.0,4.0,112.0,1520000.0,4.20,...,0.849,0.0,2840.0,2840.0,0.0,4.20,0.0,173.0,11.50,1.540
16408,1.0,1.0,0.0,1.0,0.0,0.0,3.0,112.0,493000.0,8000.00,...,1.380,0.0,1400.0,1400.0,0.0,0.00,8000.0,56.0,31.00,0.484
52688,2.0,1.0,0.0,1.0,0.0,1.0,4.0,112.0,1570000.0,4.20,...,0.816,0.0,2940.0,2940.0,0.0,4.20,0.0,179.0,8.86,1.990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56500,3.0,0.0,0.0,0.0,1.0,0.0,2.0,115.0,1680000.0,0.00,...,1.530,3180.0,0.0,3180.0,99.8,0.00,0.0,191.0,0.00,0.000
63949,2.0,1.0,0.0,0.0,0.0,1.0,1.0,112.0,1920000.0,3.05,...,1.480,3030.0,0.0,3030.0,0.0,3.05,0.0,216.0,1.82,5.130
74742,1.0,1.0,0.0,0.0,0.0,1.0,3.0,112.0,2280000.0,8000.00,...,0.945,0.0,1560.0,1560.0,0.0,0.00,8000.0,253.0,33.60,0.525
16981,1.0,1.0,0.0,1.0,0.0,0.0,3.0,112.0,510000.0,8000.00,...,1.620,0.0,400.0,400.0,0.0,0.00,8000.0,58.0,29.20,0.457


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.33, random_state=42) 

## Train Gaussian Process Regressor & Save Model

In [10]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

kernel = RBF(length_scale=1.0) + WhiteKernel(noise_level=0.1)
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
gp.fit(X_train, y_train)

## Save Model

## Generate Test Data for Predictions

## Predictions


## Plot results


## Evaluate

In [None]:
metrics = evaluate_model(y_true, y_pred)
print(metrics["RMSE"])