In [13]:
import helper
from torch.utils.data import DataLoader, random_split
import TextDataset
import importlib
importlib.reload(TextDataset)
importlib.reload(helper)

<module 'helper' from '/Volumes/Dwika/ISE/ISE-solution/lab1/helper.py'>

In [14]:
DATASETS = ['caffe', 'incubator-mxnet', 'keras', 'pytorch', 'tensorflow']
RESULTS = []
EPOCHS = 50
BATCH_SIZE = 32
VAL_RATIO = 0.2
INPUT_DIM = 1000

# Train all models

In [15]:
models=[]
val_loaders=[]
train_loaders=[]

# Load the train and validation differently

In [16]:
for names in DATASETS:
    dataset = TextDataset.TextDatasetTFIDF(f'datasets/{names}.csv')
    val_size = int(len(dataset) * VAL_RATIO)
    train_size = len(dataset) - val_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    val_loaders.append({
         f'base-dataset-{names}':val_loader
    })
    train_loaders.append({
        f'base-dataset-{names}':train_loader
    })

Preprocessed DataFrame:
                                                   text  sentiment
9    unable to reproduce accuracy of bvlc-alexnet. ...          1
267  osx: abs not defined absval_layer. When compil...          0
143  cafe_intsall.caffe 36 error. I am trying caffe...          0
212   undefined reference to `lzma_index_end@XZ_5.0...          0
227  Dimension mismatch training with my own model ...          0
Preprocessed DataFrame:
                                                   text  sentiment
304  Do I need to change grad_req when sharing weig...          0
500  docs for gluon.data.* are missing. Probably re...          0
441  Does gluon's dnn support data format of libsvm...          0
153  Building with OpenCV causes link errors. ## De...          0
502  #12285 Breaks NDArrayIter For 3D Arrays. ## De...          0
Preprocessed DataFrame:
                                                   text  sentiment
361  How to represent 28x1x1 output dense layer?. !...          0
1

In [17]:
def find_loader_by_key(loader_list, key):
    for loader_dict in loader_list:
        if key in loader_dict:
            return loader_dict[key]
    return None 

In [18]:
for names in DATASETS:
    train_loader=find_loader_by_key(train_loaders,f'base-dataset-{names}')
    model= helper.train_model(train_loader)
    models.append({
        f'base-dataset-{names}':model
    })
    val_loaders.append({
         f'base-dataset-{names}':val_loader
    })

# Save the models 

In [19]:
import torch
import os

# Ensure the folder exists
os.makedirs("models", exist_ok=True)

In [221]:
for model_dict in models:
    for name, model in model_dict.items():
        path = os.path.join("models", f"{name}.pt")
        torch.save(model.state_dict(), path) # save only the weigths
        print(f"Saved model to {path}")

Saved model to models/base-dataset-caffe.pt
Saved model to models/base-dataset-incubator-mxnet.pt
Saved model to models/base-dataset-keras.pt
Saved model to models/base-dataset-pytorch.pt
Saved model to models/base-dataset-tensorflow.pt


#### You can skip the above if all the models has been saved

# Load all the models back

In [20]:
from simple_mlp import MLPWithLayerNorm

In [218]:
# if models is None:
#     models=[]
# if len(models)==0:
#     model_folder = "models"
#     # Loop through each .pt file in the folder
#     for filename in sorted(os.listdir(model_folder)):
#         if filename.endswith(".pt"):
#             model_name = filename.replace(".pt", "")  # e.g., 'base-dataset-caffe'
#             model_path = os.path.join(model_folder, filename)
#             # Re-initialize a fresh model instance
#             model = MLPWithLayerNorm(input_dim=1000)
#             model.load_state_dict(torch.load(model_path))
#             models.append({
#                 model_name:model
#             })
#             print(f"Loaded: {model_name}")


# Evaluate the models on inference

In [21]:
results_evaluation=[]

In [22]:
for i in range(50):
    for model_dict in models:
        for name, model in model_dict.items():
            # No need to reset the weights just yet cause the model is not learning with Tent
            val_loader= find_loader_by_key(val_loaders,name)  
            result=helper.evaluate_model(model,val_loader,name,name,i)
            results_evaluation.append(result)
            

In [23]:
import os 
import pandas as pd
import csv

In [24]:
# Flatten the nested results into a list of dicts
os.makedirs("result", exist_ok=True)
columns = ["iteration", "name", "accuracy", "precision", "recall", "f1", "auc"]
with open("result/base_models.csv", mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=columns)
    writer.writeheader()

    # Loop through all results and write each row
    for results in results_evaluation:
        row = {col: results.get(col, None) for col in columns}
        writer.writerow(row)


print("Saved all base model results to result/base_models.csv")

Saved all base model results to result/base_models.csv


# Load the data to dataframe to process

In [25]:
df_base_models = pd.read_csv("result/base_models.csv")

In [26]:
model_names = df_base_models["name"].unique().tolist()
model_names

['base-dataset-caffe->base-dataset-caffe',
 'base-dataset-incubator-mxnet->base-dataset-incubator-mxnet',
 'base-dataset-keras->base-dataset-keras',
 'base-dataset-pytorch->base-dataset-pytorch',
 'base-dataset-tensorflow->base-dataset-tensorflow']

In [27]:
summary_stats = []

In [28]:
for name in model_names:
    subset = df_base_models[df_base_models["name"] == name]

    # Calculate mean and std for each metric
    stats = {
        "name": name,
        "mean_accuracy": subset["accuracy"].mean(),
        "std_accuracy": subset["accuracy"].std(),
        "mean_precision": subset["precision"].mean(),
        "std_precision": subset["precision"].std(),
        "mean_recall": subset["recall"].mean(),
        "std_recall": subset["recall"].std(),
        "mean_f1": subset["f1"].mean(),
        "std_f1": subset["f1"].std()
    }

    summary_stats.append(stats)
    

In [29]:

summary_df = pd.DataFrame(summary_stats)
summary_df

Unnamed: 0,name,mean_accuracy,std_accuracy,mean_precision,std_precision,mean_recall,std_recall,mean_f1,std_f1
0,base-dataset-caffe->base-dataset-caffe,0.859649,5.607473e-16,0.333333,1.682242e-16,0.142857,1.962616e-16,0.2,8.41121e-17
1,base-dataset-incubator-mxnet->base-dataset-inc...,0.902913,5.607473e-16,0.666667,3.364484e-16,0.461538,3.925231e-16,0.545455,3.364484e-16
2,base-dataset-keras->base-dataset-keras,0.842105,1.121495e-16,0.681818,6.728968e-16,0.517241,2.242989e-16,0.588235,5.607473e-16
3,base-dataset-pytorch->base-dataset-pytorch,0.88,4.485978e-16,0.615385,4.485978e-16,0.380952,5.607473e-17,0.470588,5.046726e-16
4,base-dataset-tensorflow->base-dataset-tensorflow,0.889262,5.607473e-16,0.68,2.242989e-16,0.666667,3.364484e-16,0.673267,1.121495e-16


# Try Tent to improve upon the model

### Load models

In [166]:
DATASETS = ['caffe', 'incubator-mxnet', 'keras', 'pytorch', 'tensorflow']


In [198]:
if models is None:
    models=[]
if len(models)==0:
    model_folder = "models"
    # Loop through each .pt file in the folder
    for filename in sorted(os.listdir(model_folder)):
        if filename.endswith(".pt"):
            model_name = filename.replace(".pt", "")  # e.g., 'base-dataset-caffe'
            model_path = os.path.join(model_folder, filename)
            # Re-initialize a fresh model instance
            model = MLPWithLayerNorm(input_dim=1000)
            model.load_state_dict(torch.load(model_path))
            models.append({
                model_name:model
            })
            print(f"Loaded: {model_name}")


# Tent Each model and save it 

In [168]:
import tent
import importlib
importlib.reload(tent)

<module 'tent' from '/Volumes/Dwika/ISE/ISE-solution/lab1/tent.py'>

# Evaluate on different dataset for 50 times on each dataset 

In [169]:
models_tented=[]

In [177]:
if models_tented is None:
    models_tented=[]
if len(models_tented)==0:
    model_folder = "models"
    # Loop through each .pt file in the folder
    for filename in sorted(os.listdir(model_folder)):
        if filename.endswith(".pt"):
            model_name = filename.replace(".pt", "")  # e.g., 'base-dataset-caffe'
            model_path = os.path.join(model_folder, filename)
            # Re-initialize a fresh model instance
            model = MLPWithLayerNorm(input_dim=1000)
            model.load_state_dict(torch.load(model_path))
            model= tent.configureTent(model)
            models_tented.append({
                model_name:model
            })
            print(f"Loaded: {model_name}")

Loaded: base-dataset-caffe
Loaded: base-dataset-incubator-mxnet
Loaded: base-dataset-keras
Loaded: base-dataset-pytorch
Loaded: base-dataset-tensorflow


  model.load_state_dict(torch.load(model_path))


# Infer 50 times on one model to target

In [178]:
data_loaders=[]

In [179]:
for names in DATASETS:
    dataset = TextDataset.TextDatasetTFIDF(f'datasets/{names}.csv')
    data_loader=DataLoader(dataset, batch_size=64, shuffle=True)
    data_loaders.append({
        f'base-dataset-{names}':data_loader
    })

Preprocessed DataFrame:
                                                   text  sentiment
9    unable to reproduce accuracy of bvlc-alexnet. ...          1
267  osx: abs not defined absval_layer. When compil...          0
143  cafe_intsall.caffe 36 error. I am trying caffe...          0
212   undefined reference to `lzma_index_end@XZ_5.0...          0
227  Dimension mismatch training with my own model ...          0
Preprocessed DataFrame:
                                                   text  sentiment
304  Do I need to change grad_req when sharing weig...          0
500  docs for gluon.data.* are missing. Probably re...          0
441  Does gluon's dnn support data format of libsvm...          0
153  Building with OpenCV causes link errors. ## De...          0
502  #12285 Breaks NDArrayIter For 3D Arrays. ## De...          0
Preprocessed DataFrame:
                                                   text  sentiment
361  How to represent 28x1x1 output dense layer?. !...          0
1

In [180]:
data_loaders

[{'base-dataset-caffe': <torch.utils.data.dataloader.DataLoader at 0x30e50e4d0>},
 {'base-dataset-incubator-mxnet': <torch.utils.data.dataloader.DataLoader at 0x310840750>},
 {'base-dataset-keras': <torch.utils.data.dataloader.DataLoader at 0x3279dbc50>},
 {'base-dataset-pytorch': <torch.utils.data.dataloader.DataLoader at 0x32c9f0790>},
 {'base-dataset-tensorflow': <torch.utils.data.dataloader.DataLoader at 0x30e281050>}]

In [181]:
tent_all_results=[]

In [193]:
dataset_tent=TextDataset.TextDatasetTFIDF("datasets/tensorflow.csv")
dataloader_tent= DataLoader(dataset_tent, batch_size=512, shuffle=True)


Preprocessed DataFrame:
                                                   text  sentiment
941  Graph_def is invalid at node u'ExpandDims': In...          0
297  stream_executor/platform/mutex.h doesn't compi...          0
271  [Perfomance]Dilated/Atrous Conv implementation...          1
774  Tensorflow 2.0 keras load_model does not resto...          0
420  Multiprocessing for input pipeline . I have as...          0


In [194]:
models

[{'base-dataset-caffe': MLPWithLayerNorm(
    (fc1): Linear(in_features=1000, out_features=64, bias=True)
    (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (relu): ReLU()
    (fc2): Linear(in_features=64, out_features=64, bias=True)
    (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (fc3): Linear(in_features=64, out_features=2, bias=True)
  )},
 {'base-dataset-incubator-mxnet': MLPWithLayerNorm(
    (fc1): Linear(in_features=1000, out_features=64, bias=True)
    (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (relu): ReLU()
    (fc2): Linear(in_features=64, out_features=64, bias=True)
    (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (fc3): Linear(in_features=64, out_features=2, bias=True)
  )},
 {'base-dataset-keras': MLPWithLayerNorm(
    (fc1): Linear(in_features=1000, out_features=64, bias=True)
    (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (relu): ReLU()
    (fc2): Linear(in_featur

In [195]:
tent_model=tent.configureTent(models[0]["base-dataset-caffe"])

In [196]:
result=tent.Tent(tent_model,dataloader_tent,"caffe","tensorflow",1)
print(result)

Adapting with Tent:   0%|          | 0/3 [00:00<?, ?it/s]

Adapting with Tent: 100%|██████████| 3/3 [00:01<00:00,  2.86it/s]


📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 81.28%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5000
{'iteration': 1, 'name': 'caffe->tensorflow', 'accuracy': 0.812751677852349, 'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'auc': 0.5}



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [183]:
for model_dict in models_tented:
    for model_name, model in model_dict.items():
        # Extract the source dataset from the model name
        for dataset_name in DATASETS:
            if dataset_name in model_name:
                model_dataset = dataset_name
                break  # We found the matching dataset used for training

        for target_dataset in DATASETS:
            if target_dataset == model_dataset:
                continue  # Skip if same dataset as model's training data

            for i in range(50):
                for loader_dict in data_loaders:
                    for loader_name, data_loader in loader_dict.items():
                        if target_dataset not in loader_name:
                            continue  # Not the right dataset to test on
                        
                        result = tent.Tent(model, data_loader, model_name, loader_name, i)
                        tent_all_results.append(result)
                        


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 20.14it/s]



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.21%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.4941


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 122.63it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5294


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 81.22it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5596


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 50.99it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5492


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 61.58it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5582


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 50.42it/s]



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5539


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 28.35it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5518


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 69.61it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5552


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 63.15it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5532


Adapting with Tent: 100%|██████████| 9/9 [00:00<00:00, 44.34it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 Test-Time Adaptation (Tent) Metrics:
  Accuracy : 87.40%
  F1 Score : 0.0000
  Precision: 0.0000
  Recall   : 0.0000
  ROC AUC  : 0.5548


Adapting with Tent:  11%|█         | 1/9 [00:00<00:02,  2.72it/s]


KeyboardInterrupt: 