In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
import glob
import os

In [4]:
csv_folder = "/content/drive/MyDrive/Colab Notebooks/splunk_anom_detector/synthetic_kpi_data/"
all_files = glob.glob(os.path.join(csv_folder, "*.csv"))

df_list = []
for file in all_files:
    temp_df = pd.read_csv(file)
    df_list.append(temp_df)

df = pd.concat(df_list, ignore_index=True)

In [5]:
def map_label(kpi_type):
    if kpi_type == "consistent":
        return "fit"
    elif kpi_type == "erratic":
        return "not_fit"
    else:
        return "maybe_fit"

df["fit_label"] = df["kpi_type"].apply(map_label)

In [6]:
# Define "block_of_day" logic
def block_of_day(hour):
    if 6 <= hour < 11:
        return "morning"
    elif 11 <= hour < 14:
        return "lunch"
    elif 14 <= hour < 18:
        return "afternoon"
    else:
        return "night"

df["block_of_day"] = df["hour_of_day"].apply(block_of_day)

# Group by kpi_name, fit_label to get a single row per KPI
aggregated = df.groupby(["kpi_name","fit_label"]).agg(
    overall_avg=("value","mean"),
    overall_stdev=("value","std"),
    stdev_morning=("value",lambda x: x[df.loc[x.index,"block_of_day"]=="morning"].std()),
    stdev_lunch=("value",lambda x: x[df.loc[x.index,"block_of_day"]=="lunch"].std()),
    stdev_afternoon=("value",lambda x: x[df.loc[x.index,"block_of_day"]=="afternoon"].std()),
    stdev_night=("value",lambda x: x[df.loc[x.index,"block_of_day"]=="night"].std())
).reset_index()

In [7]:
aggregated.fillna(0, inplace=True)  # Example: fill empty stdev with 0


In [8]:
label_map = {"fit":0, "maybe_fit":1, "not_fit":2}
aggregated["label_num"] = aggregated["fit_label"].map(label_map)


In [9]:
from sklearn.model_selection import train_test_split

features = ["overall_avg","overall_stdev",
            "stdev_morning","stdev_lunch","stdev_afternoon","stdev_night"]
X = aggregated[features].values
y = aggregated["label_num"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

num_classes = 3  # fit, maybe_fit, not_fit

model = keras.Sequential([
    layers.InputLayer(input_shape=(len(features),)),
    layers.Dense(16, activation="relu"),
    layers.Dense(8, activation="relu"),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()




In [11]:
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,  # adjust as needed
    batch_size=16
)

Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 86ms/step - accuracy: 0.2869 - loss: 118.6686 - val_accuracy: 0.2857 - val_loss: 81.9865
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.3598 - loss: 80.3068 - val_accuracy: 0.2857 - val_loss: 75.8535
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.3494 - loss: 94.5047 - val_accuracy: 0.2857 - val_loss: 70.1200
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3036 - loss: 102.6979 - val_accuracy: 0.2857 - val_loss: 64.8117
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2786 - loss: 89.7456 - val_accuracy: 0.2857 - val_loss: 59.8574
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3411 - loss: 74.0972 - val_accuracy: 0.2857 - val_loss: 55.2173
Epoch 7/50
[1m4/4[0m [32m━━━━

In [12]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5333 - loss: 0.8953
Test accuracy: 0.5333333611488342


In [13]:
!pip install tf2onnx



In [15]:
import tf2onnx
import tensorflow as tf

# Create an inference function for the model
@tf.function
def inference_func(x):
    return model(x)

spec = (tf.TensorSpec((None, len(features)), tf.float32, name="input"),)
model_proto, _ = tf2onnx.convert.from_function(
    inference_func,
    input_signature=[spec],
    opset=13
)

with open("kpi_fitness_model.onnx", "wb") as f:
    f.write(model_proto.SerializeToString())