# Understanding Catastrophic Forgetting

### Please refer to ReadMe for more details about the below Neural ODE Simulation

In [2]:
!pip install torch-optimizer

Collecting torch-optimizer
  Downloading torch_optimizer-0.3.0-py3-none-any.whl.metadata (55 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.9/55.9 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-ranger>=0.1.1 (from torch-optimizer)
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl.metadata (509 bytes)
Downloading torch_optimizer-0.3.0-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Installing collected packages: pytorch-ranger, torch-optimizer
Successfully installed pytorch-ranger-0.1.1 torch-optimizer-0.3.0


In [3]:
!pip install torchdiffeq

Collecting torchdiffeq
  Downloading torchdiffeq-0.2.5-py3-none-any.whl.metadata (440 bytes)
Downloading torchdiffeq-0.2.5-py3-none-any.whl (32 kB)
Installing collected packages: torchdiffeq
Successfully installed torchdiffeq-0.2.5


In [7]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from torchdiffeq import odeint

# function to create a synthetic dataset for classification task that is linearly seperable and easy (TASK 1)
def generate_task1_dataset(n_samples=1000, n_features=2, n_classes=2):
    X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=2, n_redundant=0, n_repeated=0, random_state=42)
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# function to create a new dataset which is nonlinearly seperable and noisy (TASK 2)
def generate_task2_dataset(n_samples=1000, n_features=2, n_classes=2):
    X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=2, n_redundant=0, n_repeated=0, n_clusters_per_class=2, class_sep=0.5, random_state=42)
    # Adding noise for non-linearity
    noise = np.random.randn(n_samples, n_features) * 2
    X = X + noise
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# simple feed forward neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

# neural ODE
class NeuralODE(nn.Module):
    def __init__(self):
        super(NeuralODE, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, t, x):
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

# training function for both - neural ODE and basic feed Fwd Neural Net
def train_model(model, data, labels, epochs=100, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# evaluating model accuracy
def evaluate_model(model, X, y):
    with torch.no_grad():
        outputs = model(X)
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y).float().mean().item()
    return accuracy

# accuracy evaluation for neural ODE
def evaluate_ode_model(model, X, y):
    with torch.no_grad():
        # assuming that time steps are 0 and 1 - just an example
        t = torch.tensor([0., 1.])  # Example time steps
        outputs = odeint(model, X, t)
        outputs = outputs[-1]  #  final output after integration
        _, predicted = torch.max(outputs, 1)
        accuracy = (predicted == y).float().mean().item()
    return accuracy

# X and y corresponding to task 1
X1, y1 = generate_task1_dataset(n_samples=1000)

# non-linear and noisy dataset for task 2
X2, y2 = generate_task2_dataset(n_samples=1000)

# simple NN was trained on 200 epochs
simple_nn = SimpleNN()
train_model(simple_nn, X1, y1, epochs=200)

# evaluation on TASK 1 before TASK 2
accuracy_nn_task1_before = evaluate_model(simple_nn, X1, y1)
print(f"Accuracy on Task 1 (before Task 2) with NN: {accuracy_nn_task1_before:.4f}")

# training on TASK 2
train_model(simple_nn, X2, y2, epochs=200)

# evaluate both the tasks
accuracy_nn_task1_after = evaluate_model(simple_nn, X1, y1)
accuracy_nn_task2 = evaluate_model(simple_nn, X2, y2)

print(f"Accuracy on Task 1 (after Task 2) with NN: {accuracy_nn_task1_after:.4f}")
print(f"Accuracy on Task 2 with NN: {accuracy_nn_task2:.4f}")

# repeatition of the process for neural ODE
def train_ode_model(model, X, y, epochs=100, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        optimizer.zero_grad()
        t = torch.tensor([0., 1.])  #  dummy time steps for the ODE solver
        outputs = odeint(model, X, t)
        outputs = outputs[-1]   # final output after integration
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

#  neural ODE training on TASK 1
neural_ode = NeuralODE()
train_ode_model(neural_ode, X1, y1, epochs=200)

#   evaluate on TASK 1 before TASK 2 for neural ODE
accuracy_ode_task1_before = evaluate_ode_model(neural_ode, X1, y1)
print(f"Accuracy on Task 1 (before Task 2) with ODE: {accuracy_ode_task1_before:.4f}")

#  train Neural ODE on TASK 2
train_ode_model(neural_ode, X2, y2, epochs=200)

#    evaluate Neural ODE on TASK 1 and TASK 2 after training on TASK 2
accuracy_ode_task1_after = evaluate_ode_model(neural_ode, X1, y1)
accuracy_ode_task2 = evaluate_ode_model(neural_ode, X2, y2)

print(f"Accuracy on Task 1 (after Task 2) with ODE: {accuracy_ode_task1_after:.4f}")
print(f"Accuracy on Task 2 with ODE: {accuracy_ode_task2:.4f}")


Accuracy on Task 1 (before Task 2) with NN: 0.8820
Accuracy on Task 1 (after Task 2) with NN: 0.8080
Accuracy on Task 2 with NN: 0.6090
Accuracy on Task 1 (before Task 2) with ODE: 0.8740
Accuracy on Task 1 (after Task 2) with ODE: 0.8640
Accuracy on Task 2 with ODE: 0.6100


### End