In [3]:
import sys
import numpy as np
# import scikitlearn
import matplotlib
import pandas as pd
import glob
from sklearn import svm
from sklearn.model_selection import train_test_split

# Local includes:
sys.path.append("../../src/")
import therm_params as tp
from power_model import (
    leakagePower,
    peripheral_power,
    board_power
)


In [4]:
blackscholes_files = glob.glob("../blackscholes_benchmarking/*.csv")
bodytrack_files = glob.glob("../bodytrack_benchmarking/*.csv")
# random_data = ["../random_test.csv"]
random_data = ["../bodytrack_benchmarking/xu4_random_loads_4_1_2018.csv"]
files = random_data#blackscholes_files

In [5]:
header = "time	watts	w_leak	w_dyn	w_periph	usage_c0	usage_c1	usage_c2	usage_c3	usage_c4	usage_c5	usage_c6	usage_c7	temp4	temp5	temp6	temp7	temp_gpu	freq_little_cluster	freq_big_cluster	freq_gpu	freq_mem,	volts_little_cluster	volts_big_cluster	volts_gpu	volts_mem".split('\t')
   

df_list = []
for file in files:
    df = pd.read_csv(file, index_col=False, header=0, sep='\t', usecols=header)
    # Drop where watts <= 0:
    df = df.loc[df.watts > 0]
    # Need to add a column for each entry that is the next set of thermal values
    # if there is no next row, then the current row should be discarded (hence last row)
    # Set initial values to NAN
    df['next_t4'] = np.nan
    df['next_t5'] = np.nan
    df['next_t6'] = np.nan
    df['next_t7'] = np.nan
    df['next_watts'] = np.nan
#     indices_to_drop = []
    for index,row in df.iterrows():   
        if index < len(df.index)-1:
            df.at[index, 'next_t4'] = df.loc[index+1, 'temp4']
            df.at[index, 'next_t5'] = df.loc[index+1, 'temp5']
            df.at[index, 'next_t6'] = df.loc[index+1, 'temp6']
            df.at[index, 'next_t7'] = df.loc[index+1, 'temp7']
            df.at[index, 'next_watts'] = df.loc[index+1, 'watts']
        elif index < len(df.index):
            # drop the current row
#             indices_to_drop.append(index)
            df.drop(df.index[[index]], inplace=True)
    df.reset_index(drop=True, inplace=True)
    df_list.append(df)

In [6]:
data = pd.concat(df_list, ignore_index=True)
# Drop all measurements where watts <= 0:
data = data.loc[data.watts > 0]
# fill in the potentially missing voltage values
data.volts_big_cluster = data.freq_big_cluster.map(lambda x: tp.big_f_to_v[float(x)/1000000000])
data.volts_little_cluster = data.freq_little_cluster.map(lambda x: tp.little_f_to_v[float(x)/1000000000])
data.reset_index(drop=True, inplace=True)

In [7]:
data.columns

Index(['time', 'watts', 'w_leak', 'w_dyn', 'w_periph', 'usage_c0', 'usage_c1',
       'usage_c2', 'usage_c3', 'usage_c4', 'usage_c5', 'usage_c6', 'usage_c7',
       'temp4', 'temp5', 'temp6', 'temp7', 'temp_gpu', 'freq_little_cluster',
       'freq_big_cluster', 'freq_gpu', 'freq_mem,', 'volts_little_cluster',
       'volts_big_cluster', 'volts_gpu', 'volts_mem', 'next_t4', 'next_t5',
       'next_t6', 'next_t7', 'next_watts'],
      dtype='object')

In [11]:
# Get just the data to be used for train and test:
XY = data.loc[:,[
            # Inputs:
            'watts', 'usage_c0', 'usage_c1', 'usage_c2', 'usage_c3', \
            'usage_c4', 'usage_c5', 'usage_c6', 'usage_c7',\
            'temp4', 'temp5', 'temp6', 'temp7', 'temp_gpu',\
            'freq_big_cluster', 'freq_little_cluster',\
            'volts_big_cluster', 'volts_little_cluster',\
            # Expected outputs:
            'next_t4', 'next_t5', 'next_t6', 'next_t7', 'next_watts',\
            # Keep the timestamp for later plotting:
            'time'\
            ]]
XY.dropna(axis=0, inplace=True)

In [12]:
# Create an SVR regression model to predict each temperature (for big cores)
# and power for the whole board:
# The models:
TPMs = []
for i in range(5):
    TPMs.append( svm.SVR() )
X = XY[ [
            # Inputs:
            'watts', 'usage_c0', 'usage_c1', 'usage_c2', 'usage_c3', \
            'usage_c4', 'usage_c5', 'usage_c6', 'usage_c7',\
            'temp4', 'temp5', 'temp6', 'temp7', 'temp_gpu',\
            'freq_big_cluster', 'freq_little_cluster',\
            'volts_big_cluster', 'volts_little_cluster' ] ].as_matrix()

Y = XY[ ['next_t4', 'next_t5', 'next_t6', 'next_t7', 'next_watts'] ].as_matrix()

# # Create training and test sets:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42) 

In [13]:
# Train the models:
for i in range(5):
    TPMs[i] = TPMs[i].fit(X_train, Y_train[:, i])
    print("Finished training model {}".format(i))

Finished training model 0
Finished training model 1
Finished training model 2
Finished training model 3
Finished training model 4


In [None]:
Pt = []#np.matrix()
# Run each of the models on test data:
Pt.append( TPMs[i].predict(X_train) )
Pt

In [None]:
P = []
# Run each of the models on test data:
P.append( TPM.predict(X_test) )
P

In [None]:
# Score:
R = TPM.score(X_test, np.as_matrix(P) )
R

In [None]:
Y_train - np.as_matrix(Pt)