In [9]:
import pandas as pd

from data import Dataset, get_transfer_dataset
from learner import PredictorLearner, TransferLearner
import analysis as a

# define training set sizes to be used in experiments
TRAINING_SET_SIZES = [0.2, 0.4, 0.6, 0.8]

# initialise results columns
rq1_results_fields = ['mse_accuracy_tgt_no_cv', 'mape_accuracy_tgt_no_cv', 'mse_accuracy_tgt_cv', 'mape_accuracy_tgt_cv',
                     'mse_accuracy_trans_no_cv', 'mape_accuracy_trans_no_cv', 'mse_accuracy_trans_cv', 'mape_accuracy_trans_cv']
rq1_results = pd.DataFrame(columns=rq1_results_fields) # done
rq2_results_fields = ['mape_accuracy_pred_20pct', 'mape_accuracy_pred_40pct', 'mape_accuracy_pred_60pct', 
                     'mape_accuracy_pred_80pct', 'mape_accuracy_trans_20pct', 'mape_accuracy_trans_40pct', 
                     'mape_accuracy_trans_60pct',  'mape_accuracy_trans_80pct', 'mse_accuracy_pred_20pct', 
                     'mse_accuracy_pred_40pct', 'mse_accuracy_pred_60pct', 'mse_accuracy_pred_80pct',  
                     'mse_accuracy_trans_20pct', 'mse_accuracy_trans_40pct', 'mse_accuracy_trans_60pct', 
                     'mse_accuracy_trans_80pct']
rq2_results = pd.DataFrame(columns=rq2_results_fields)
rq3_results_fields = ['training_time_pred_20pct_no_cv', 'training_time_pred_40pct_no_cv', 'training_time_pred_60pct_no_cv', 
                     'training_time_pred_80pct_no_cv', 'training_time_pred_20pct_cv', 'training_time_pred_40pct_cv', 
                     'training_time_pred_60pct_cv', 'training_time_pred_80pct_cv', 'training_time_trans_20pct_no_cv', 
                     'training_time_trans_40pct_no_cv', 'training_time_trans_60pct_no_cv', 'training_time_trans_80pct_no_cv', 
                     'training_time_trans_20pct_cv', 'training_time_trans_40pct_cv', 'training_time_trans_60pct_cv', 
                     'training_time_trans_80pct_cv']
rq3_results = pd.DataFrame(columns=rq3_results_fields)

dataset_nodejs = Dataset('../dataset/nodejs/43/buffer1.csv', 'nodejs')
dataset_poppler = Dataset('../dataset/poppler/12/cuda.csv', 'poppler')
dataset_x264_src = Dataset('../dataset/x264/43/original_videos_Animation_480P_Animation_480P-087e.csv', 'x264')
dataset_x264_target = Dataset('../dataset/x264/44/original_videos_Animation_480P_Animation_480P-087e.csv', 'x264')
dataset_xz = Dataset('../dataset/xz/23/dickens.csv', 'xz')

In [11]:
### RQ1

mape_accuracy_pred_no_cv = []
mape_accuracy_trans_no_cv = []
mape_accuracy_pred_cv = []
mape_accuracy_trans_cv = []
mse_accuracy_pred_no_cv = []
mse_accuracy_trans_no_cv = []
mse_accuracy_pred_cv = []
mse_accuracy_trans_cv = []

predictor = PredictorLearner()
transferrer = TransferLearner()

# get optimised predictor model using hyperparameter optimisation
X_train, X_validate, y_train, y_validate = dataset_x264_target.get_split_dataset()
optimised_model, _ = predictor.get_optimal_params(X_validate, y_validate)

# get accuracy of optimised model for predictor learner
predictor.fit(X_train, y_train, premade_model=optimised_model)
mape_accuracy_pred_cv.append(predictor.get_error(X_train, y_train, measure='mape'))
mse_accuracy_pred_cv.append(predictor.get_error(X_train, y_train, measure='mse'))

# get accuracy of non-optimised model for predictor learner
predictor.fit(X_train, y_train)
mape_accuracy_pred_no_cv.append(predictor.get_error(X_train, y_train, measure='mape'))
mse_accuracy_pred_no_cv.append(predictor.get_error(X_train, y_train, measure='mse'))


# get optimised transfer model using hyperparameter optimisation
X_train, X_validate, y_train, y_validate = get_transfer_dataset(dataset_x264_src, dataset_x264_target)
optimised_model, _ = transferrer.get_optimal_params(X_validate, y_validate)

# get accuracy of optimised model for transfer learner
transferrer.fit(X_train, y_train, premade_model=optimised_model)
mape_accuracy_trans_cv.append(transferrer.get_error(X_train, y_train, measure='mape'))
mse_accuracy_trans_cv.append(predictor.get_error(X_train, y_train, measure='mse'))

# get accuracy of non-optimised model for transfer learner
transferrer.fit(X_train, y_train)
mape_accuracy_trans_no_cv.append(transferrer.get_error(X_train, y_train, measure='mape'))
mse_accuracy_trans_no_cv.append(predictor.get_error(X_train, y_train, measure='mse'))


rq1_results['mse_accuracy_tgt_no_cv'] = mse_accuracy_pred_no_cv
rq1_results['mape_accuracy_tgt_no_cv'] = mape_accuracy_pred_no_cv
rq1_results['mse_accuracy_tgt_cv'] = mse_accuracy_pred_cv
rq1_results['mape_accuracy_tgt_cv'] = mape_accuracy_pred_cv
rq1_results['mse_accuracy_trans_no_cv'] = mse_accuracy_trans_no_cv
rq1_results['mape_accuracy_trans_no_cv'] = mape_accuracy_trans_no_cv
rq1_results['mse_accuracy_trans_cv'] = mse_accuracy_trans_cv
rq1_results['mape_accuracy_trans_cv'] = mape_accuracy_trans_cv

rq1_results

Unnamed: 0,mse_accuracy_tgt_no_cv,mape_accuracy_tgt_no_cv,mse_accuracy_tgt_cv,mape_accuracy_tgt_cv,mse_accuracy_trans_no_cv,mape_accuracy_trans_no_cv,mse_accuracy_trans_cv,mape_accuracy_trans_cv
0,"[-8471.888413687499, -39136.194582625, -18490....","[-0.1134862609145678, -0.28557446175939194, -0...","[-8483.774160390003, -30793.92556560358, -1847...","[-0.1449802914931689, -0.3033148820351462, -0....","[-991.762172625, -1080.8257089687502, -1777.55...","[-0.8917782831424731, -0.7355122995286951, -0....","[-991.762172625, -1080.8257089687502, -1777.55...","[-0.4432734512627955, -0.5046296042361565, -0...."


In [12]:
### RQ2
mape_accuracy_pred_20pct = []
mape_accuracy_pred_40pct = []
mape_accuracy_pred_60pct = []
mape_accuracy_pred_80pct = []
mape_accuracy_trans_20pct = []
mape_accuracy_trans_40pct = []
mape_accuracy_trans_60pct = []
mape_accuracy_trans_80pct = []
mse_accuracy_pred_20pct = []
mse_accuracy_pred_40pct = []
mse_accuracy_pred_60pct = []
mse_accuracy_pred_80pct = []
mse_accuracy_trans_20pct = []
mse_accuracy_trans_40pct = []
mse_accuracy_trans_60pct = []
mse_accuracy_trans_80pct = []

transferrer = TransferLearner()
predictor = PredictorLearner()

for train_size in TRAINING_SET_SIZES: 
    
    X_train, X_validate, y_train, y_validate = dataset_x264_target.get_split_dataset()
    optimised_model, _ = predictor.get_optimal_params(X_validate, y_validate)
    
    # get accuracy of predictor model for current training set size
    predictor.fit(X_train, y_train, premade_model=optimised_model)
    mape_accuracy_pred = predictor.get_error(X_train, y_train, measure='mape')
    mse_accuracy_pred = predictor.get_error(X_train, y_train, measure='mse')
    
    
    X_train, X_validate, y_train, y_validate = get_transfer_dataset(dataset_x264_src, 
                                                                    dataset_x264_target, 
                                                                    train_size=train_size)
    optimised_model, _ = transferrer.get_optimal_params(X_validate, y_validate)
    
    # get accuracy of transfer model for each training set size
    transferrer.fit(X_train, y_train, premade_model=optimised_model)
    mape_accuracy_trans = transferrer.get_error(X_train, y_train, measure='mape')
    mse_accuracy_trans = transferrer.get_error(X_train, y_train, measure='mse')
    

    # record accuracy in appropriate results column
    if train_size == 0.2:
        mape_accuracy_pred_20pct.append(mape_accuracy_pred)
        mape_accuracy_trans_20pct.append(mape_accuracy_trans)
        mse_accuracy_pred_20pct.append(mse_accuracy_pred)
        mse_accuracy_trans_20pct.append(mse_accuracy_trans)
    elif train_size == 0.4:
        mape_accuracy_pred_40pct.append(mape_accuracy_pred)
        mape_accuracy_trans_40pct.append(mape_accuracy_trans)
        mse_accuracy_pred_40pct.append(mse_accuracy_pred)
        mse_accuracy_trans_40pct.append(mse_accuracy_trans)
    elif train_size == 0.6:
        mape_accuracy_pred_60pct.append(mape_accuracy_pred)
        mape_accuracy_trans_60pct.append(mape_accuracy_trans)
        mse_accuracy_pred_60pct.append(mse_accuracy_pred)
        mse_accuracy_trans_60pct.append(mse_accuracy_trans)
    elif train_size == 0.8:
        mape_accuracy_pred_80pct.append(mape_accuracy_pred)
        mape_accuracy_trans_80pct.append(mape_accuracy_trans)
        mse_accuracy_pred_80pct.append(mse_accuracy_pred)
        mse_accuracy_trans_80pct.append(mse_accuracy_trans)
        
        
rq2_results['mape_accuracy_pred_20pct'] = mape_accuracy_pred_20pct
rq2_results['mape_accuracy_pred_40pct'] = mape_accuracy_pred_40pct
rq2_results['mape_accuracy_pred_60pct'] = mape_accuracy_pred_60pct
rq2_results['mape_accuracy_pred_80pct'] = mape_accuracy_pred_80pct
rq2_results['mse_accuracy_pred_20pct'] = mse_accuracy_pred_20pct
rq2_results['mse_accuracy_pred_40pct'] = mse_accuracy_pred_40pct
rq2_results['mse_accuracy_pred_60pct'] = mse_accuracy_pred_60pct
rq2_results['mse_accuracy_pred_80pct'] = mse_accuracy_pred_80pct
rq2_results['mape_accuracy_trans_20pct'] = mape_accuracy_trans_20pct
rq2_results['mape_accuracy_trans_40pct'] = mape_accuracy_trans_40pct
rq2_results['mape_accuracy_trans_60pct'] = mape_accuracy_trans_60pct
rq2_results['mape_accuracy_trans_80pct'] = mape_accuracy_trans_80pct
rq2_results['mse_accuracy_trans_20pct'] = mse_accuracy_trans_20pct
rq2_results['mse_accuracy_trans_40pct'] = mse_accuracy_trans_40pct
rq2_results['mse_accuracy_trans_60pct'] = mse_accuracy_trans_40pct
rq2_results['mse_accuracy_trans_80pct'] = mse_accuracy_trans_80pct

rq2_results

Unnamed: 0,mape_accuracy_pred_20pct,mape_accuracy_pred_40pct,mape_accuracy_pred_60pct,mape_accuracy_pred_80pct,mape_accuracy_trans_20pct,mape_accuracy_trans_40pct,mape_accuracy_trans_60pct,mape_accuracy_trans_80pct,mse_accuracy_pred_20pct,mse_accuracy_pred_40pct,mse_accuracy_pred_60pct,mse_accuracy_pred_80pct,mse_accuracy_trans_20pct,mse_accuracy_trans_40pct,mse_accuracy_trans_60pct,mse_accuracy_trans_80pct
0,"[-0.1255026519673369, -0.2775767648322348, -0....","[-0.12523356399540295, -2.111733187805072, -0....","[-0.1328684217075297, -0.1302644847064683, -0....","[-0.12440875044490296, -0.1388280539122848, -0...","[-0.43706998693535914, -0.3611685713730872, -0...","[-0.3755720482248503, -0.4224961844939908, -0....","[-0.38302431248210506, -0.4721943802585633, -0...","[-0.4432734512627955, -0.5046296042361565, -0....","[-8483.052148750001, -30862.42313675, -20442.0...","[-8584.82353169971, -88339.85459534623, -19803...","[-8478.680864187501, -8602.295517281249, -1806...","[-8404.76188628125, -30815.603184249998, -1832...","[-205.19596761919917, -347.14672241535806, -11...","[-195.51619277786514, -1656.0681436299392, -24...","[-195.51619277786514, -1656.0681436299392, -24...","[-873.997777015775, -2135.646858602326, -1003...."


In [14]:
### RQ3

training_time_pred_20pct_no_cv = []
training_time_pred_40pct_no_cv = []
training_time_pred_60pct_no_cv = []
training_time_pred_80pct_no_cv = []
training_time_pred_20pct_cv = []
training_time_pred_40pct_cv = []
training_time_pred_60pct_cv = []
training_time_pred_80pct_cv = []
training_time_trans_20pct_no_cv = []
training_time_trans_40pct_no_cv = []
training_time_trans_60pct_no_cv = []
training_time_trans_80pct_no_cv = []
training_time_trans_20pct_cv = []
training_time_trans_40pct_cv = []
training_time_trans_60pct_cv = []
training_time_trans_80pct_cv = []

transferrer = TransferLearner()
predictor = PredictorLearner()

for train_size in TRAINING_SET_SIZES:
    
    X_train, X_validate, y_train, y_validate = dataset_x264_target.get_split_dataset()
    # get optimised predictor model using hyperparameter optimisation
    optimised_model, _ = predictor.get_optimal_params(X_validate, y_validate)
    predictor.fit(X_train, y_train, premade_model=optimised_model)
    
    # gather results
    training_time_pred_no_cv = predictor.get_training_time()
    training_time_pred_cv = predictor.get_training_time(include_optimisation_time=True)
    

    X_train, X_validate, y_train, y_validate = get_transfer_dataset(dataset_x264_src, dataset_x264_target)
    # get optimised transfer model using hyperparameter optimisation
    optimised_model, _ = transferrer.get_optimal_params(X_validate, y_validate)
    transferrer.fit(X_train, y_train, premade_model=optimised_model)
    
    # gather results
    training_time_trans_no_cv = transferrer.get_training_time()
    training_time_trans_cv = transferrer.get_training_time(include_optimisation_time=True)


    # record accuracy in appropriate results column
    if train_size == 0.2:
        training_time_pred_20pct_no_cv.append(training_time_pred_no_cv)
        training_time_pred_20pct_cv.append(training_time_pred_cv)
        training_time_trans_20pct_no_cv.append(training_time_trans_no_cv)
        training_time_trans_20pct_cv.append(training_time_trans_cv)
    elif train_size == 0.4:
        training_time_pred_40pct_no_cv.append(training_time_pred_no_cv)
        training_time_pred_40pct_cv.append(training_time_pred_cv)
        training_time_trans_40pct_no_cv.append(training_time_trans_no_cv)
        training_time_trans_40pct_cv.append(training_time_trans_cv)
    elif train_size == 0.6:
        training_time_pred_60pct_no_cv.append(training_time_pred_no_cv)
        training_time_pred_60pct_cv.append(training_time_pred_cv)
        training_time_trans_60pct_no_cv.append(training_time_trans_no_cv)
        training_time_trans_60pct_cv.append(training_time_trans_cv)
    elif train_size == 0.8:
        training_time_pred_80pct_no_cv.append(training_time_pred_no_cv)
        training_time_pred_80pct_cv.append(training_time_pred_cv)
        training_time_trans_80pct_no_cv.append(training_time_trans_no_cv)
        training_time_trans_80pct_cv.append(training_time_trans_cv)
        
        
rq3_results['training_time_pred_20pct_no_cv'] = training_time_pred_20pct_no_cv
rq3_results['training_time_pred_40pct_no_cv'] = training_time_pred_40pct_no_cv
rq3_results['training_time_pred_60pct_no_cv'] = training_time_pred_60pct_no_cv
rq3_results['training_time_pred_80pct_no_cv'] = training_time_pred_80pct_no_cv
rq3_results['training_time_pred_20pct_cv'] = training_time_pred_20pct_cv
rq3_results['training_time_pred_40pct_cv'] = training_time_pred_40pct_cv
rq3_results['training_time_pred_60pct_cv'] = training_time_pred_60pct_cv
rq3_results['training_time_pred_80pct_cv'] = training_time_pred_80pct_cv
rq3_results['training_time_trans_20pct_no_cv'] = training_time_trans_20pct_no_cv
rq3_results['training_time_trans_40pct_no_cv'] = training_time_trans_40pct_no_cv
rq3_results['training_time_trans_60pct_no_cv'] = training_time_trans_60pct_no_cv
rq3_results['training_time_trans_80pct_no_cv'] = training_time_trans_80pct_no_cv
rq3_results['training_time_trans_20pct_cv'] = training_time_trans_20pct_cv
rq3_results['training_time_trans_40pct_cv'] = training_time_trans_40pct_cv
rq3_results['training_time_trans_60pct_cv'] = training_time_trans_60pct_cv
rq3_results['training_time_trans_80pct_cv'] = training_time_trans_80pct_cv

rq3_results

Unnamed: 0,training_time_pred_20pct_no_cv,training_time_pred_40pct_no_cv,training_time_pred_60pct_no_cv,training_time_pred_80pct_no_cv,training_time_pred_20pct_cv,training_time_pred_40pct_cv,training_time_pred_60pct_cv,training_time_pred_80pct_cv,training_time_trans_20pct_no_cv,training_time_trans_40pct_no_cv,training_time_trans_60pct_no_cv,training_time_trans_80pct_no_cv,training_time_trans_20pct_cv,training_time_trans_40pct_cv,training_time_trans_60pct_cv,training_time_trans_80pct_cv
0,0.001002,0.0,0.0,0.001,0.208562,0.20572,0.205879,0.205931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
