In [None]:
%pip install numpy pandas matplotlib scikit-learn h5py

In [None]:
# import h5py
import scipy.io
import scipy.stats
import sklearn.metrics
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### Load data

In [None]:
featFileName = "../data/features_combined.csv"
batch_pd = pd.read_csv(featFileName, index_col=False)
batch_pd

## Features

In [None]:
feat = batch_pd['QDiffLinVar'].to_numpy() # Selecting the feature to use
bat_info = batch_pd.loc[:,'policy':'cycle_life']
feat

In [None]:
# Plotting the feature vs the cycle life
plt.figure(figsize=(8,8), dpi=80, facecolor='w', edgecolor='k', )
plt.scatter(feat, bat_info["cycle_life"]) 
plt.xlabel("QDiffLinVar")
plt.ylabel("cycle_life")
plt.grid(True)
plt.show()

## Model Building

In [None]:
# shaping the data
feat_train = np.reshape(feat[0::2], (-1,1))
feat_test = np.reshape(feat[1::2], (-1,1))
label_train = bat_info["cycle_life"][0::2]
label_test = bat_info["cycle_life"][1::2]


In [None]:
# scale data
feat_train_scaled = scipy.stats.zscore(feat_train)
feat_test_scaled = scipy.stats.zmap(feat_test, feat_train)

## Training Model

In [None]:
B = linear_model.LinearRegression()
B.fit(feat_train_scaled, label_train)

## Predicting

In [None]:
# make predictions
ypred_train = B.predict(feat_train_scaled)
ypred_test = B.predict(feat_test_scaled)
ypred_train

In [None]:
label_train.to_numpy()

In [None]:
plt.figure("Sk-learn basemode prediction", figsize=(8,8), dpi=100, facecolor='w', edgecolor='k')
plt.axes(aspect='equal')
plt.scatter(label_train, ypred_train, label="train", marker="o")
plt.scatter(label_test, ypred_test, label="test", marker="^")
plt.xlabel('Predicted Cycle life')
plt.ylabel('Actual Cycle life')
lims = [0, 2000]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims, 'k', )
plt.legend()
plt.savefig("../variance_model/figures/sk-learn-basemode-prediction.png")
plt.show()

## Evaluation

In [None]:
# Printing the RMSE of the model
print(sklearn.metrics.mean_squared_error(label_train, ypred_train, squared=False))
print(sklearn.metrics.mean_squared_error(label_test, ypred_test, squared=False))

### Coefficient of Determination (R-Squared)

In [None]:
print(B.score(np.reshape(feat_train_scaled, (-1, 1)), label_train))
print(B.score(np.reshape(feat_test_scaled, (-1,1)), label_test))

In [None]:
# plt.scatter(feat_train_scaled[:,0], np.log10(label_train), label="train", marker="o")
# plt.scatter(feat_test_scaled[:,0], np.log10(label_test), label="test", marker="^")
# sorting_order = np.argsort(feat_train_scaled[:,0])
# plt.plot(feat_train_scaled[:,0][sorting_order], np.log10(ypred_train[sorting_order]))
# plt.legend()
# plt.show()