In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import plotly.express as px
pd.set_option('display.float_format', lambda x: '%.6f' % x)
pd.set_option('display.max_rows', 110)
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt

In [7]:
# 10 functions with most data in test set (last 3 days)
workload_ids = [1, 7, 21, 31, 37, 50, 55, 56, 69, 71]

combined = pd.DataFrame({"second": range(0, 1209601)})
for id in workload_ids:
    data = pd.read_csv(f'../../data/training_data/{id}.txt', delimiter=',')
    data = data[["second", "invocations"]]
    combined[f"{id}"] = data["invocations"]

combined

Unnamed: 0,second,1,7,21,31,37,50,55,56,69,71
0,0,0,0,1,1,0,1,0,0,0,1
1,1,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1209596,1209596,0,0,0,0,0,0,0,0,0,0
1209597,1209597,0,0,0,0,0,0,0,0,0,0
1209598,1209598,0,0,0,0,0,0,0,0,0,0
1209599,1209599,0,0,0,0,0,0,0,0,0,0


In [32]:
combined
combined.to_csv("all_data.csv", index=False)

In [8]:
# get only test set (last 3 days)
combined = combined.loc[combined["second"] >= 950400]

In [30]:
# Filter rows where at least one column (excluding index and 'second' column) has a value of 1
filtered_df = combined[combined.iloc[:, 2:].eq(1).any(axis=1)]
filtered_df.to_csv("evaluation_data.csv", index=False)
filtered_df

Unnamed: 0,second,1,7,21,31,37,50,55,56,69,71
950400,950400,1,0,0,0,0,0,0,1,1,1
950401,950401,1,0,0,0,0,1,0,0,0,0
950402,950402,1,0,0,1,0,0,0,0,0,0
950404,950404,0,0,0,0,0,0,1,0,0,0
950408,950408,1,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1209302,1209302,0,0,0,0,0,1,0,0,0,0
1209363,1209363,0,0,0,0,0,0,1,0,0,0
1209424,1209424,0,0,0,1,0,0,0,0,0,0
1209432,1209432,0,0,0,0,1,0,0,0,0,0


In [9]:
for index, row in filtered_df.iterrows():
    # Get column names where the value is 1 for the current row
    cols_with_1 = row[row == 1].index.tolist()
    print(cols_with_1)
    second = row['second']
    print(second)
    break

['21', '31', '50', '71']
0


In [16]:
import torch
import torchvision.models as models

In [26]:
for i in workload_ids:
    resnet152 = models.resnet152(pretrained=True)
    torch.save(resnet152.state_dict(), f'resnet152_weights_{i}.pth')

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /Users/kamil/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:42<00:00, 5.72MB/s] 


In [18]:
dl_models = {}

for i in workload_ids:
    dl_models[i] = models.resnet50()
    dl_models[i].load_state_dict(torch.load(f'../../data/DL_model_files/resnet50_weights_{i}.pth'))

print(dl_models)

{1: ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=

In [19]:
import urllib.request

# URL of the sample image
image_url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
# Path to save the image locally
image_path = "sample_image.jpg"

# Download the image
urllib.request.urlretrieve(image_url, image_path)

('sample_image.jpg', <http.client.HTTPMessage at 0x29f933880>)

In [20]:
import torchvision.transforms as transforms
from PIL import Image

In [24]:
image_path = "sample_image.jpg"
image = Image.open(image_path)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to the image
input_tensor = preprocess(image)
# Add batch dimension
input_batch = input_tensor.unsqueeze(0)

# Set the model to evaluation mode
dl_models[1].eval()

# Perform inference
with torch.no_grad():
    output = dl_models[1](input_batch)

# Get predicted class probabilities
probabilities = torch.nn.functional.softmax(output[0], dim=0)
# Get the index with the highest probability
predicted_class_index = torch.argmax(probabilities).item()
predicted_class_index

258