In [None]:
%load_ext autoreload
%autoreload 1

import sys
from pathlib import Path

dist_to_root = 0    # the distance to project root folder
_project_root = Path.cwd().parents[dist_to_root]
if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))

from utils import split_helper, restrict_group_samples, split_cycle_data_no_missing, split_without_overlap

In [1]:
from functools import partial
from pathlib import Path
from sklearn.metrics import r2_score

import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_absolute_error

In [3]:
labels = ['capacity']
data_groupby = ['cycle']

In [4]:
def generate_test_firstN(first_N, features):
    data = pd.read_csv('../data/data_nasa/test.csv')
    test_func = partial(restrict_group_samples, num_restricted=first_N)
    test = split_helper(data, data_groupby, features, labels, test_func)
    test_x = test[features]
    test_y = test[labels]
    return test_x,test_y

In [None]:
def generate_test_eachN(each_N, features):
    data = pd.read_csv('../data/data_nasa/test.csv')
    # test_func = partial(split_cycle_data_no_missing, threshold=each_N)
    test_func = partial(split_without_overlap, split_size=each_N)    
    test = split_helper(data, data_groupby, features, labels, test_func)
    test_x = test[features]
    test_y = test[labels]
    return test_x,test_y

In [6]:
model_dict = {
    'model1': {
        'path': '../download/nasa_3features_first20_01380851.pkl',
        'features': ['voltage', 'temperature', 'discharge_voltage_rate']
    },
    'model2': {
        'path': '../download/nasa_3features_first20_01575541.pkl',
        'features': ['voltage', 'temperature', 'discharge_voltage_rate']
    },
    'model3': {
        'path': '../download/nasa_3features_first20_01776440.pkl',
        'features': ['voltage', 'temperature', 'discharge_voltage_rate']
    },
    'model4': {
        'path': 'model_nasa/nasa_3features_first20_0_028749.pkl',
        'features': ['discharge_voltage_rate', 'voltage', 'temperature']
    },
}

In [7]:
for key,value in model_dict.items():
    print(f'{key}: {value.get('path')}')
    with open(value.get('path'), 'rb') as file:
        best_model = pickle.load(file)

    for N in [5,10,20,30,50]:
        
        # generate test data
        # test_x,test_y = generate_test_firstN(N,value.get('features'))
        test_x,test_y = generate_test_eachN(N,value.get('features'))

        # prediction on model
        pred = best_model.predict(test_x)

        # calculate mae and r2 score
        mae = mean_absolute_error(test_y, pred)
        r2 = r2_score(test_y, pred)

        print(f"N = {N}, mae = {mae:.4f}, r2 = {r2:.4f}")

model1: ../download/nasa_3features_first20_01380851.pkl
N = 5, mae = 0.0239, r2 = 0.9574
N = 10, mae = 0.0220, r2 = 0.9516
N = 20, mae = 0.0207, r2 = 0.9535
N = 30, mae = 0.0503, r2 = 0.7211
N = 50, mae = 0.0464, r2 = 0.8506
model2: ../download/nasa_3features_first20_01575541.pkl
N = 5, mae = 0.0264, r2 = 0.9431
N = 10, mae = 0.0234, r2 = 0.9491
N = 20, mae = 0.0212, r2 = 0.9535
N = 30, mae = 0.0469, r2 = 0.7694
N = 50, mae = 0.0288, r2 = 0.9443
model3: ../download/nasa_3features_first20_01776440.pkl
N = 5, mae = 0.0242, r2 = 0.9578
N = 10, mae = 0.0214, r2 = 0.9575
N = 20, mae = 0.0215, r2 = 0.9573
N = 30, mae = 0.0462, r2 = 0.7435
N = 50, mae = 0.0441, r2 = 0.8535
model4: model_nasa/nasa_3features_first20_0_028749.pkl
N = 5, mae = 0.0246, r2 = 0.9556
N = 10, mae = 0.0220, r2 = 0.9631
N = 20, mae = 0.0195, r2 = 0.9661
N = 30, mae = 0.0224, r2 = 0.9575
N = 50, mae = 0.0287, r2 = 0.9385
