In [197]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pykalman import KalmanFilter
from sklearn.linear_model import LinearRegression

In [198]:
X_columns = ['temperature', 'cpu_percent', 'fan_rpm', 'sys_load_1', 'cpu_freq']
y_column = 'next_temp'


In [199]:
def get_data(filename):
    """
    Read the given CSV file. Returns sysinfo DataFrame with target (next temperature) column created.
    """
    sysinfo = pd.read_csv(filename, parse_dates=['timestamp'])
    
    # TODO: add the column that we want to predict: the temperatures from the *next* time step.
    sysinfo[y_column] = sysinfo['temperature'].shift(periods = -1) # should be the temperature value from the next row
    sysinfo = sysinfo.dropna()
    #sysinfo[y_column] = sysinfo[y_column].fillna('')
    #sysinfo = sysinfo[sysinfo[y_column].notnull()] # the last row should have y_column null: no next temp known
    #sysinfo.next_temp[21695] = "null"

    return sysinfo


In [200]:
train_data = get_data("sysinfo-train.csv")
print(train_data)

                       timestamp  temperature  sys_load_1  cpu_percent  \
0     2020-05-20 12:02:49.850707    32.000000        0.58         2.86   
1     2020-05-20 12:02:59.855392    31.750000        0.57         2.68   
2     2020-05-20 12:03:09.859261    32.166667        0.48         3.24   
3     2020-05-20 12:03:19.863313    31.833333        0.49         3.52   
4     2020-05-20 12:03:29.868141    32.000000        0.64         2.43   
...                          ...          ...         ...          ...   
21691 2020-05-23 00:19:44.901521    30.333333        1.11         4.73   
21692 2020-05-23 00:19:54.906179    30.500000        1.02         3.43   
21693 2020-05-23 00:20:04.910416    30.333333        0.86         5.48   
21694 2020-05-23 00:20:14.914973    30.166667        0.88         3.68   
21695 2020-05-23 00:20:24.919573    30.500000        1.06         3.96   

          cpu_freq  fan_rpm  next_temp  
0      1474.153167      796  31.750000  
1      1579.055250      805  

In [217]:
def get_trained_coefficients(X_train, y_train):
    """
    Create and train a model based on the training_data_file data.

    Return the model, and the list of coefficients for the 'X_columns' variables in the regression.
    """

    temp = X_train['temperature'].tolist()
    sys_load_1 = X_train['sys_load_1'].tolist()
    cpu_percent = X_train['cpu_percent'].tolist()
    cpu_fre = X_train['cpu_freq'].tolist()
    fan_rpm = X_train['fan_rpm'].tolist()
    y = y_train.tolist()

    X = np.stack([temp,sys_load_1,cpu_percent,cpu_fre,fan_rpm],axis = 1)
    print(X)
    model = LinearRegression(fit_intercept = False)
    model.fit(X,y_train)

    coefficients = model.coef_
    #print(y)
    # TODO: create regression model and train.

    return model, coefficients



In [218]:
X_train, y_train = train_data[X_columns], train_data[y_column]
m,c = get_trained_coefficients(X_train, y_train)  
print(m,c)


[[3.20000000e+01 5.80000000e-01 2.86000000e+00 1.47415317e+03
  7.96000000e+02]
 [3.17500000e+01 5.70000000e-01 2.68000000e+00 1.57905525e+03
  8.05000000e+02]
 [3.21666667e+01 4.80000000e-01 3.24000000e+00 1.50034858e+03
  8.10000000e+02]
 ...
 [3.03333333e+01 8.60000000e-01 5.48000000e+00 1.10859542e+03
  8.46000000e+02]
 [3.01666667e+01 8.80000000e-01 3.68000000e+00 1.18273408e+03
  7.45000000e+02]
 [3.05000000e+01 1.06000000e+00 3.96000000e+00 1.38694733e+03
  7.70000000e+02]]
LinearRegression(fit_intercept=False) [ 5.96299639e-01  7.26935176e-01 -1.26922795e-01  2.74112833e-04
  1.62989305e-02]
