In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
import math

In [3]:
data = pd.read_csv("metrics.csv")
data.head()

Unnamed: 0,timestamp,cpu_usage,memory_usage,disk_io,network_in,network_out,db_queries,db_latency,app_errors,response_time,active_users
0,2025-01-01 00:00:00,55.052082,35.836442,24.862598,19.796225,22.052899,119.918888,6.94204,2.424845,119.682105,441.122469
1,2025-01-01 00:05:00,42.994891,42.544408,28.80275,28.97653,10.95911,48.764157,4.300685,1.481822,133.844532,385.939515
2,2025-01-01 00:10:00,50.756957,38.272199,22.1144,19.956845,19.768088,140.011782,4.799572,2.407843,265.416551,547.382904
3,2025-01-01 00:15:00,53.165998,39.243855,23.676543,27.45622,16.7974,84.131386,7.76968,1.713054,188.618634,673.131839
4,2025-01-01 00:20:00,48.200113,33.354073,24.526947,22.425039,5.285591,86.012965,6.188546,2.66355,179.486447,384.999948


## Data :

In [8]:
data['timestamp'] = pd.to_datetime(data['timestamp'], format="%Y-%m-%d %H:%M:%S")
data['hour'] = data['timestamp'].dt.hour
data['minute'] = data['timestamp'].dt.minute
data['time_value'] = data['hour'] * 60 + data['minute']
data['time_value_normalized'] = data['time_value'] / (24 * 60) * 2 * np.pi

## Activation classes

In [9]:
def t2v(tau, f, out_features, w, b, w0, b0, arg=None):
    if arg:
        v1 = f(torch.matmul(tau, w) + b, arg)
    else:
        #print(w.shape, t1.shape, b.shape)
        v1 = f(torch.matmul(tau, w) + b)
    v2 = torch.matmul(tau, w0) + b0
    #print(v1.shape)
    return torch.cat([v1, v2], -1)

class SineActivation(nn.Module):
    def __init__(self, in_features, out_features):
        super(SineActivation, self).__init__()
        self.out_features = out_features
        self.w0 = nn.parameter.Parameter(torch.randn(in_features, 1))
        self.b0 = nn.parameter.Parameter(torch.randn(1))
        self.w = nn.parameter.Parameter(torch.randn(in_features, out_features-1))
        self.b = nn.parameter.Parameter(torch.randn(out_features-1))
        self.f = torch.sin

    def forward(self, tau):
        return t2v(tau, self.f, self.out_features, self.w, self.b, self.w0, self.b0)

class CosineActivation(nn.Module):
    def __init__(self, in_features, out_features):
        super(CosineActivation, self).__init__()
        self.out_features = out_features
        self.w0 = nn.parameter.Parameter(torch.randn(in_features, 1))
        self.b0 = nn.parameter.Parameter(torch.randn(1))
        self.w = nn.parameter.Parameter(torch.randn(in_features, out_features-1))
        self.b = nn.parameter.Parameter(torch.randn(out_features-1))
        self.f = torch.cos

    def forward(self, tau):
        return t2v(tau, self.f, self.out_features, self.w, self.b, self.w0, self.b0)

if __name__ == "__main__":
    sineact = SineActivation(1, 64)
    cosact = CosineActivation(1, 64)

    print(sineact(torch.Tensor([[7]])).shape)
    print(cosact(torch.Tensor([[7]])).shape)

torch.Size([1, 64])
torch.Size([1, 64])


## Model class

In [10]:
class Model(nn.Module):
    def __init__(self, activation, hidden_dim):
        super(Model, self).__init__()
        if activation == "sin":
            self.l1 = SineActivation(1, hidden_dim)
        elif activation == "cos":
            self.l1 = CosineActivation(1, hidden_dim)

        self.fc1 = nn.Linear(hidden_dim, 2)

    def forward(self, x):
        #x = x.unsqueeze(1)
        x = self.l1(x)
        x = self.fc1(x)
        return x

Passing data via model

In [22]:
time_values = torch.tensor(data['time_value_normalized'].values, dtype=torch.float32).unsqueeze(1)
model = Model(hidden_dim = 32, activation = 'sin')

with torch.no_grad():
  embeddings = model(time_values)


time_embeddings_np =embeddings.numpy()
len(time_embeddings_np)

1000

## using pearsons corr

In [25]:
import scipy.stats as stats


metric_columns = ['cpu_usage', 'memory_usage', 'disk_io', 'network_in',
                  'network_out', 'db_queries', 'db_latency', 'app_errors',
                  'response_time', 'active_users']


correlations = {}

for i in range(time_embeddings_np.shape[1]):
    dim_correlations = {}
    for col in metric_columns:
        corr, p_value = stats.pearsonr(time_embeddings_np[:, i], data[col])
        dim_correlations[col] = (corr, p_value)
    correlations[f"dim_{i}"] = dim_correlations

top_correlations = {}
for dim, corrs in correlations.items():
    sorted_corrs = sorted(corrs.items(), key=lambda x: abs(x[1][0]), reverse=True)
    top_correlations[dim] = sorted_corrs[:3]  # Top 3 correlations

for dim, top_corrs in top_correlations.items():
    print(f"Time embedding dimension {dim}:")
    for metric, (corr, p_value) in top_corrs:
        print(f"  {metric}: correlation = {corr:.4f}")

Time embedding dimension dim_0:
  memory_usage: correlation = 0.1534
  disk_io: correlation = 0.1511
  cpu_usage: correlation = 0.1131
Time embedding dimension dim_1:
  cpu_usage: correlation = 0.1912
  disk_io: correlation = 0.1870
  memory_usage: correlation = 0.1803
