In [1]:
import os 
import csv
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.metrics import MeanSquaredError
from keras.losses import MeanSquaredError

In [2]:
random.seed(42)
path = '../ce_data/output_512/dataset_ship.csv'

In [3]:
def read_df(path):
    # Read csv
    df = pd.read_csv(path)
    # Calculate miss ratios in seperate dataframe
    miss_ratios = pd.DataFrame()
    for i in ["1024","2048","4096","8192"]:
        miss_ratios[i] = df['misses_'+i] / df['accesses_'+i]
        df = df.drop(columns=['misses_'+i, 'accesses_'+i])
        if 'l1d_accesses_'+i in df.columns:
            df = df.drop(columns=['l1d_accesses_'+i])
    # Append filename column
    filenames = []
    for i in df["id"]:
        nmb = i.split('_')[-1]
        filenames.append(i[:-len(nmb)-1])
    df["file_name"] = filenames
    filenames = list(set(filenames))
    return df, miss_ratios

There's a chance that reuse distance histograms within files are similar to each other. This may be due to them being sampled from the same files and therefore generated from the same code, so the histograms will be similar. To avoid this interfering with our process, we'll be moving all the lines that are derived from randomly picked files until they add up to 20% of the dataset. 

In [4]:
def train_test_split(df, miss_ratios, split=0.3):
    filenames = list(set(df["file_name"]))
    collected_rows = 0
    files = []
    while len(df) * split > collected_rows:
        rf = filenames[random.randint(0,188)]
        if rf in files: # No duplicates
            continue
        files.append(rf)
        collected_rows += len(df[df["file_name"] == rf])

    train_rds = df[~df["file_name"].isin(files)]
    train_mrs = miss_ratios[~df["file_name"].isin(files)]
    test_rds = df[df["file_name"].isin(files)]
    test_mrs = miss_ratios[df["file_name"].isin(files)]
    print(f"The test set consists of {len(files)} files (out of 189) with a total of {collected_rows} rows (out of {len(df)})")
    train_rds = train_rds.drop(columns=["id", "file_name"])
    test_rds = test_rds.drop(columns=["id", "file_name"])
    return train_rds, train_mrs, test_rds, test_mrs
#train_rds, train_mrs, test_rds, test_mrs = train_test_split(rds, mrs)

In [5]:
def create_model(layers=[896,896,1]):
    model = Sequential()
    model.add(Dense(896, input_dim=896, activation='sigmoid'))
    
    # Add layers
    for i in range(1, len(layers)):
        model.add(Dense(layers[i], activation='sigmoid'))
    
    # Compile the model
    model.compile(loss=MeanSquaredError() , optimizer='adam')
    return model

# Ship replacement Policy

In [6]:
rds, mrs = read_df('../ce_data/output_512/dataset_ship.csv')
train_rds, train_mrs, test_rds, test_mrs = train_test_split(rds, mrs, split=0.25)

The test set consists of 54 files (out of 189) with a total of 1547 rows (out of 6182)


In [7]:
j = ['1024', '2048', '4096', '8192']
m = create_model([896,896,len(j)])
m.summary()
for i in range(10):
    m.fit(train_rds.to_numpy(), train_mrs[j].to_numpy(), epochs=2)
#    for i,k in enumerate(j):
#        print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
    for i,k in enumerate(j):
        print(f'Mean squared error for cache size {k}\t: {mean_squared_error(m(test_rds.to_numpy())[:,i], test_mrs[k].to_numpy())}')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 896)               803712    
                                                                 
 dense_1 (Dense)             (None, 896)               803712    
                                                                 
 dense_2 (Dense)             (None, 4)                 3588      
                                                                 
Total params: 1,611,012
Trainable params: 1,611,012
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Mean squared error for cache size 1024	: 0.08750855828280242
Mean squared error for cache size 2048	: 0.07424994561614187
Mean squared error for cache size 4096	: 0.07043049673216144
Mean squared error for cache size 8192	: 0.08765517250861413
Epoch 1/2
Epoch 2/2
Mean squared error for cac

# LRU replacement Policy

In [8]:
rds, mrs = read_df('../ce_data/output_512/dataset_lru.csv')
# print(rds, mrs)
train_rds, train_mrs, test_rds, test_mrs = train_test_split(rds, mrs, split=0.25)

The test set consists of 36 files (out of 189) with a total of 1708 rows (out of 6183)


In [9]:
j = ['1024', '2048', '4096', '8192']
m = create_model([896,896,len(j)])
m.summary()
#for i,k in enumerate(j):
#   print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
for i in range(10):
    m.fit(train_rds.to_numpy(), train_mrs[j].to_numpy(), epochs=2)
#    for i,k in enumerate(j):
#        print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
    for i,k in enumerate(j):
        print(f'Mean squared error for cache size {k}\t: {mean_squared_error(m(test_rds.to_numpy())[:,i], test_mrs[k].to_numpy())}')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 896)               803712    
                                                                 
 dense_4 (Dense)             (None, 896)               803712    
                                                                 
 dense_5 (Dense)             (None, 4)                 3588      
                                                                 
Total params: 1,611,012
Trainable params: 1,611,012
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Mean squared error for cache size 1024	: 0.012875371123205047
Mean squared error for cache size 2048	: 0.03507795809186156
Mean squared error for cache size 4096	: 0.07685864715052503
Mean squared error for cache size 8192	: 0.10061846873134875
Epoch 1/2
Epoch 2/2
Mean squared error for 

# Mockinjay replacement Policy

In [10]:
rds, mrs = read_df('../ce_data/output_512/dataset_mj.csv')
# print(rds, mrs)
train_rds, train_mrs, test_rds, test_mrs = train_test_split(rds, mrs, split=0.25)

The test set consists of 56 files (out of 189) with a total of 1696 rows (out of 6182)


In [11]:
j = ['1024', '2048', '4096', '8192']
m = create_model([896,896,len(j)])
m.summary()
#for i,k in enumerate(j):
#   print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
for i in range(10):
    m.fit(train_rds.to_numpy(), train_mrs[j].to_numpy(), epochs=2)
#    for i,k in enumerate(j):
#        print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
    for i,k in enumerate(j):
        print(f'Mean squared error for cache size {k}\t: {mean_squared_error(m(test_rds.to_numpy())[:,i], test_mrs[k].to_numpy())}')

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 896)               803712    
                                                                 
 dense_7 (Dense)             (None, 896)               803712    
                                                                 
 dense_8 (Dense)             (None, 4)                 3588      
                                                                 
Total params: 1,611,012
Trainable params: 1,611,012
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Mean squared error for cache size 1024	: 0.11806458191745896
Mean squared error for cache size 2048	: 0.07942598515408282
Mean squared error for cache size 4096	: 0.12202709388801564
Mean squared error for cache size 8192	: 0.14402506094936754
Epoch 1/2
Epoch 2/2
Mean squared error for c

# Srrip replacement Policy

In [12]:
rds, mrs = read_df('../ce_data/output_512/dataset_srrip.csv')
# print(rds, mrs)
train_rds, train_mrs, test_rds, test_mrs = train_test_split(rds, mrs, split=0.25)

The test set consists of 48 files (out of 189) with a total of 1593 rows (out of 6181)


In [13]:
j = ['1024', '2048', '4096', '8192']
m = create_model([896,896,len(j)])
m.summary()
#for i,k in enumerate(j):
#   print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
for i in range(10):
    m.fit(train_rds.to_numpy(), train_mrs[j].to_numpy(), epochs=2)
#    for i,k in enumerate(j):
#        print(mean_squared_error(m(train_rds.to_numpy())[:,i], train_mrs[k].to_numpy()))
    for i,k in enumerate(j):
        print(f'Mean squared error for cache size {k}\t: {mean_squared_error(m(test_rds.to_numpy())[:,i], test_mrs[k].to_numpy())}')

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 896)               803712    
                                                                 
 dense_10 (Dense)            (None, 896)               803712    
                                                                 
 dense_11 (Dense)            (None, 4)                 3588      
                                                                 
Total params: 1,611,012
Trainable params: 1,611,012
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2
Epoch 2/2
Mean squared error for cache size 1024	: 0.015242504870059473
Mean squared error for cache size 2048	: 0.05618234474675598
Mean squared error for cache size 4096	: 0.0998030073370528
Mean squared error for cache size 8192	: 0.1304343437360494
Epoch 1/2
Epoch 2/2
Mean squared error for ca