In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
from pykalman import KalmanFilter
from sklearn.linear_model import LinearRegression

In [53]:
X_columns = ['temperature', 'cpu_percent', 'fan_rpm', 'sys_load_1']
y_column = 'next_temp'

In [54]:
sysinfo = pd.read_csv('sysinfo.csv', parse_dates=['timestamp'])

# TODO: add the column that we want to predict: the temperatures from the *next* time step.
sysinfo[y_column] = sysinfo['temperature'].shift(-1) # should be the temperature value from the next row
sysinfo = sysinfo[sysinfo[y_column].notnull()] # the last row should have y_column null: no next temp known (deleted)

# return most of the data to train, and pick an interesting segment to test
split1 = int(sysinfo.shape[0] * 0.8)
split2 = int(sysinfo.shape[0] * 0.84)
train = sysinfo.iloc[:split1, :]
test = sysinfo.iloc[split1:split2, :]

In [55]:
X_train, y_train = train[X_columns], train[y_column]
X_test, y_test = test[X_columns], test[y_column]

In [56]:
#Create and train a model based on the training_data_file data.
#Return the model, and the list of coefficients for the 'X_columns' variables in the regression.
# TODO: create regression model and train.
model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train) #train
coefficients = model.coef_

In [57]:
regress = ' + '.join('%.3g*%s' % (coef, col) for col, coef in zip(X_columns, coefficients))
print('next_temp = ' + regress)
#print("Training score: %g\nTesting score: %g" % (model.score(X_train, y_train), model.score(X_test, y_test)))

next_temp = 0.637*temperature + 0.051*cpu_percent + 0.0188*fan_rpm + 1.25*sys_load_1


In [58]:
plt.figure(0)
plt.hist(y_test - model.predict(X_test), bins=100)
plt.savefig('test_errors.png')

In [61]:
X_test, y_test = test[X_columns], test[y_column] #for kalman filter only

# feel free to tweak these if you think it helps.
transition_stddev = 2.0
observation_stddev = 2.0

dims = X_test.shape[-1]
initial = X_test.iloc[0]
observation_covariance = np.diag([observation_stddev, 2, 10, 1]) ** 2
transition_covariance = np.diag([transition_stddev, 80, 100, 10]) ** 2

# Transition = identity for all variables, except we'll replace the top row
# to make a better prediction, which was the point of all this.
transition = np.eye(dims) # identity matrix, except...

# TODO: replace the first row of transition to use the coefficients we just calculated (which were passed into this function as coef.).
transition[0] = coefficients

kf = KalmanFilter(
    initial_state_mean=initial,
    initial_state_covariance=observation_covariance,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance,
)

kalman_smoothed, _ = kf.smooth(X_test)

plt.figure(figsize=(15, 6))
plt.plot(test['timestamp'], test['temperature'], 'b.', alpha=0.5)
plt.plot(test['timestamp'], kalman_smoothed[:, 0], 'g-')
plt.savefig('smoothed.png')