In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [16]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
import time
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()
print("Device:", DEVICE)

Device: cuda


In [17]:
def generate_dataset(time_steps=1500):

    processes = [f"P{i}" for i in range(10)]
    data = []

    for process in processes:

        prev_memory = np.random.uniform(50,150)
        regime = 0

        for t in range(time_steps):

            if t in [500,1000]:
                regime = 1 - regime

            noise = np.random.normal(0,3)

            if process=="P0":
                memory = prev_memory + 0.6 + noise*0.3

            elif process=="P1":
                spike = np.random.pareto(2)*30 if np.random.rand()<0.15 else 0
                memory = prev_memory + spike + noise

            elif process=="P2":
                memory = prev_memory + np.random.normal(0,8)

            elif process=="P3":
                memory = 120 + 45*np.sin(t/10) + noise

            elif process=="P4":
                memory = prev_memory * (1.004 if regime==0 else 0.996)

            elif process=="P5":
                memory = 80 + 40*np.log(t+1)

            elif process=="P6":
                memory = prev_memory - 0.7 + noise

            elif process=="P7":
                memory = prev_memory + (25 if regime else -20) + noise

            elif process=="P8":
                memory = 60 + (t%200)*1.2

            elif process=="P9":
                r=3.9
                x=np.random.rand()
                chaotic=r*x*(1-x)
                memory = prev_memory + chaotic*15

            cpu = np.random.uniform(5,95)
            data.append([process,t,prev_memory,cpu,memory])
            prev_memory = memory

    return pd.DataFrame(data,
        columns=["process_id","time_step","previous_memory","cpu_usage","memory_usage_next"])

In [18]:
class MemoryDataset(Dataset):

    def __init__(self, df, seq_len=15):
        self.samples=[]
        self.seq_len=seq_len

        for pid in df["process_id"].unique():
            proc_df=df[df["process_id"]==pid].sort_values("time_step")
            for i in range(len(proc_df)-seq_len):
                window=proc_df.iloc[i:i+seq_len]
                target=proc_df.iloc[i+seq_len]["memory_usage_next"]
                self.samples.append((window,target))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self,idx):
        window,target=self.samples[idx]
        proc=torch.LongTensor(window["process_encoded"].values)
        feat=torch.FloatTensor(window[["previous_memory","cpu_usage"]].values)
        y=torch.FloatTensor([target])
        return proc,feat,y

In [19]:
class DeepLSTM(nn.Module):

    def __init__(self,num_processes):
        super().__init__()

        self.embedding=nn.Embedding(num_processes,64)

        self.lstm=nn.LSTM(
            input_size=64+2,
            hidden_size=128,
            num_layers=3,
            dropout=0.3,
            batch_first=True
        )

        self.fc=nn.Sequential(
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Linear(128,1)
        )

    def forward(self,proc,feat):
        emb=self.embedding(proc)
        x=torch.cat([emb,feat],dim=2)
        out,_=self.lstm(x)
        return self.fc(out[:,-1,:])

In [20]:
def compute_metrics(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true,y_pred))
    mae  = mean_absolute_error(y_true,y_pred)
    r2   = r2_score(y_true,y_pred)
    return rmse, mae, r2

In [21]:
def evaluate_model(model, df):

    model.eval()
    results=[]

    for pid in df["process_id"].unique():

        subset=df[df["process_id"]==pid]
        ds=MemoryDataset(subset)
        loader=DataLoader(ds,batch_size=64)

        preds,ys=[],[]

        with torch.no_grad():
            for p,f,y in loader:
                p,f=p.to(DEVICE),f.to(DEVICE)
                out=model(p,f)
                preds.extend(out.cpu().numpy())
                ys.extend(y.numpy())

        rmse,mae,r2=compute_metrics(ys,preds)

        results.append({
            "Process":pid,
            "RMSE":rmse,
            "MAE":mae,
            "R2":r2
        })

    return pd.DataFrame(results)

In [22]:
def train_model(model,loader,epochs=15):

    optimizer=optim.Adam(model.parameters(),lr=0.001)
    criterion=nn.MSELoss()

    for epoch in range(1,epochs+1):

        model.train()
        total_loss=0
        grad_norm_total=0

        for p,f,y in loader:
            p,f,y=p.to(DEVICE),f.to(DEVICE),y.to(DEVICE)

            optimizer.zero_grad()
            pred=model(p,f)
            loss=criterion(pred,y)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            total_norm=0
            for param in model.parameters():
                if param.grad is not None:
                    total_norm += param.grad.data.norm(2).item()

            grad_norm_total+=total_norm

            optimizer.step()
            total_loss+=loss.item()

        print(f"Epoch {epoch:02d} | Loss {total_loss/len(loader):.4f} | GradNorm {grad_norm_total/len(loader):.4f}")

    return model

In [23]:
def unlearn(model,target_loader,remain_loader):

    criterion=nn.MSELoss()

    # ---- Phase 1: Erase target ----
    optimizer=optim.Adam(model.parameters(),lr=0.0005)

    print("\n--- Unlearning Phase 1 (Gradient Ascent on Target) ---")
    for epoch in range(1,3):

        total_loss=0

        for p,f,y in target_loader:
            p,f,y=p.to(DEVICE),f.to(DEVICE),y.to(DEVICE)
            optimizer.zero_grad()
            pred=model(p,f)
            loss=criterion(pred,y)
            ascent_loss = torch.clamp(loss, max=5.0)
            (-ascent_loss).backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            total_loss+=loss.item()

        print(f"Unlearn Epoch {epoch} | Target Loss {total_loss/len(target_loader):.4f}")

    # ---- Phase 2: Restore remaining ----
    optimizer=optim.Adam(model.parameters(),lr=0.001)

    print("\n--- Unlearning Phase 2 (Recovery on Remaining) ---")
    for epoch in range(1,6):

        total_loss=0

        for p,f,y in remain_loader:
            p,f,y=p.to(DEVICE),f.to(DEVICE),y.to(DEVICE)
            optimizer.zero_grad()
            pred=model(p,f)
            loss=criterion(pred,y)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            total_loss+=loss.item()

        print(f"Recovery Epoch {epoch} | Remaining Loss {total_loss/len(remain_loader):.4f}")

    return model

In [24]:
df=generate_dataset()

le=LabelEncoder()
df["process_encoded"]=le.fit_transform(df["process_id"])

feature_scaler = StandardScaler()
target_scaler  = StandardScaler()

df[["previous_memory","cpu_usage"]] = feature_scaler.fit_transform(
    df[["previous_memory","cpu_usage"]]
)

df["memory_usage_next"] = target_scaler.fit_transform(
    df[["memory_usage_next"]]
)

train_df,test_df=train_test_split(df,test_size=0.2,random_state=42)

train_loader=DataLoader(MemoryDataset(train_df),batch_size=64,shuffle=True)

model=DeepLSTM(len(le.classes_)).to(DEVICE)

print("\n===== TRAINING START =====")
model=train_model(model,train_loader)

print("\n===== BASELINE METRICS =====")
baseline_metrics=evaluate_model(model,test_df)
print(baseline_metrics)

target_process="P3"

target_loader=DataLoader(
    MemoryDataset(train_df[train_df["process_id"]==target_process]),
    batch_size=64,shuffle=True)

remain_loader=DataLoader(
    MemoryDataset(train_df[train_df["process_id"]!=target_process]),
    batch_size=64,shuffle=True)

print("\n===== UNLEARNING START =====")
model_un=DeepLSTM(len(le.classes_)).to(DEVICE)
model_un.load_state_dict(model.state_dict())

model_un=unlearn(model_un,target_loader,remain_loader)

print("\n===== AFTER UNLEARNING METRICS =====")
after_metrics=evaluate_model(model_un,test_df)
print(after_metrics)

# Certified forgetting score
before_target = baseline_metrics[baseline_metrics["Process"]==target_process]["RMSE"].values[0]
after_target  = after_metrics[after_metrics["Process"]==target_process]["RMSE"].values[0]

print("\nCertified Forgetting Score (RMSE increase on target):",
      after_target - before_target)


===== TRAINING START =====
Epoch 01 | Loss 0.1496 | GradNorm 1.7784
Epoch 02 | Loss 0.0072 | GradNorm 0.9861
Epoch 03 | Loss 0.0040 | GradNorm 0.6358
Epoch 04 | Loss 0.0041 | GradNorm 0.7191
Epoch 05 | Loss 0.0046 | GradNorm 0.7194
Epoch 06 | Loss 0.0037 | GradNorm 0.6841
Epoch 07 | Loss 0.0028 | GradNorm 0.5789
Epoch 08 | Loss 0.0025 | GradNorm 0.5332
Epoch 09 | Loss 0.0028 | GradNorm 0.5603
Epoch 10 | Loss 0.0027 | GradNorm 0.5124
Epoch 11 | Loss 0.0022 | GradNorm 0.4636
Epoch 12 | Loss 0.0022 | GradNorm 0.4513
Epoch 13 | Loss 0.0020 | GradNorm 0.4372
Epoch 14 | Loss 0.0022 | GradNorm 0.4566
Epoch 15 | Loss 0.0021 | GradNorm 0.4401

===== BASELINE METRICS =====
  Process      RMSE       MAE        R2
0      P7  0.131094  0.103638  0.983245
1      P4  0.016220  0.012752  0.903964
2      P8  0.021846  0.018410 -0.073696
3      P0  0.005152  0.004285  0.995568
4      P3  0.012078  0.010293 -0.395236
5      P2  0.012953  0.010199  0.885363
6      P1  0.056434  0.051549  0.985968
7      

In [27]:
# =====================================================
# PROCESS-WISE RMSE + R2 COMPARISON TABLE
# =====================================================

comparison_rows = []

for pid in baseline_metrics["Process"]:

    before_rmse = baseline_metrics[
        baseline_metrics["Process"] == pid]["RMSE"].values[0]

    after_rmse = after_metrics[
        after_metrics["Process"] == pid]["RMSE"].values[0]

    before_r2 = baseline_metrics[
        baseline_metrics["Process"] == pid]["R2"].values[0]

    after_r2 = after_metrics[
        after_metrics["Process"] == pid]["R2"].values[0]

    comparison_rows.append({
        "Process": pid,
        "Before RMSE": round(before_rmse, 6),
        "After RMSE": round(after_rmse, 6),
        "Before R2": round(before_r2, 6),
        "After R2": round(after_r2, 6)
    })

comparison_df = pd.DataFrame(comparison_rows)

print("\n===== PROCESS-WISE RMSE + R2 COMPARISON =====\n")
print(comparison_df.sort_values("Process").reset_index(drop=True))


===== PROCESS-WISE RMSE + R2 COMPARISON =====

  Process  Before RMSE  After RMSE  Before R2      After R2
0      P0     0.005152    0.012950   0.995568      0.971999
1      P1     0.056434    0.034787   0.985968      0.994668
2      P2     0.012953    0.026242   0.885363      0.529527
3      P3     0.012078    2.443270  -0.395236 -57092.709268
4      P4     0.016220    0.023869   0.903964      0.792017
5      P5     0.008050    0.009197   0.198743     -0.045971
6      P6     0.007699    0.023578   0.993549      0.939491
7      P7     0.131094    0.126311   0.983245      0.984446
8      P8     0.021846    0.021633  -0.073696     -0.052850
9      P9     0.093034    0.073009   0.994502      0.996614
