In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/5way-5shot/train_LSA64_5way_5shot_suppot_query.csv
/kaggle/input/5way-5shot/test_LSA64_5way_5shot_suppot_query.csv
/kaggle/input/te-v3-model/model_epoch_70.pt


In [2]:
from sklearn.metrics import accuracy_score,silhouette_score 
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import csv
import ast
import torch.nn.functional as F
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import LabelEncoder
import random
from torch.optim import Adam
from torch.nn.functional import cross_entropy
import pandas as pd
from sklearn.model_selection import train_test_split



In [3]:
# Your code for loading the data (unchanged)
input_file = "/kaggle/input/5way-5shot/train_LSA64_5way_5shot_suppot_query.csv"

values = []
matrix_labels = []
num_rows = 0

with open(input_file, "r") as f_input:
    reader = csv.reader(f_input)
    for row in reader:
        row_values = []
        for i in range(len(row) - 1):
            column_value = ast.literal_eval(row[i])
            row_values.append(column_value)
        values.append(torch.tensor(row_values))
        matrix_labels.append(ast.literal_eval(row[-1]))
        num_rows += 1
        
matrix_labels = np.array(matrix_labels)

In [4]:
class TransformerEncoder(nn.Module):
    def __init__(self, n_features, d_model=32, nhead=32, num_layers=1):
        super(TransformerEncoder, self).__init__()
        self.embedding = nn.Linear(n_features, d_model)
        self.positional_encoding = self.generate_positional_encoding(d_model)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model, nhead), num_layers
        )

    def generate_positional_encoding(self, d_model, max_len=243):
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe.unsqueeze(0)

    def forward(self, x):
        x = self.embedding(x)
        x = x + self.positional_encoding[:, : x.size(1)]
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        return x

In [5]:
class PrototypicalNetworks(nn.Module):
    def __init__(self, backbone: nn.Module):
        super(PrototypicalNetworks, self).__init__()
        self.backbone = backbone

    def forward(
        self,
        support_images: torch.Tensor,
        support_labels: torch.Tensor,
        query_images: torch.Tensor,
    ) -> torch.Tensor:
        """
        Predict query labels using labeled support images.
        """
        # Extract the features of support and query images
        z_support = self.backbone.forward(support_images)
        z_query = self.backbone.forward(query_images)

        # Infer the number of different classes from the labels of the support set
        n_way = len(torch.unique(support_labels))
        # Prototype i is the mean of all instances of features corresponding to labels == i
        z_proto = torch.cat(
            [
                z_support[torch.nonzero(support_labels == label)].mean(0)
                for label in range(n_way)
            ]
        )
#         print(z_proto.shape)
#         print(z_query.shape, z_proto.shape)
        # Compute the euclidean distance from queries to prototypes
        dists = torch.cdist(z_query, z_proto)
#         print(dists.shape)
        # And here is the super complicated operation to transform those distances into classification scores!
        scores = -dists
        return scores

In [6]:
# Load the data and split it into training and testing sets
# train_values, test_values, train_matrix_labels, test_matrix_labels = train_test_split(values, matrix_labels, test_size=0.2, random_state=42, stratify=matrix_labels)

train_values = torch.stack(values)
train_matrix_labels = matrix_labels

In [7]:
# Load the saved TransformerEncoder model
saved_model_path = "/kaggle/input/te-v3-model/model_epoch_70.pt"

# Instantiate the TransformerEncoder as the backbone
n_features = 114
encoder = TransformerEncoder(n_features=n_features)
encoder.load_state_dict(torch.load(saved_model_path))

<All keys matched successfully>

In [8]:
# embeddings = encoder(torch.stack(values))

In [9]:
model = PrototypicalNetworks(encoder)

In [10]:
def create_few_shot_task(embeddings, labels, n_way, k_shot, n_query):
    unique_labels = np.unique(labels)
    selected_labels = np.random.choice(unique_labels, n_way, replace=False)
    
    # Create a dictionary for mapping the original labels to the new labels
    label_map = {label: idx for idx, label in enumerate(selected_labels)}
    
    support_set = []
    query_set = []
    support_labels = []
    query_labels = []
    
    for label in selected_labels:
        class_embeddings = embeddings[labels == label]
        if len(class_embeddings) < k_shot + n_query:
            continue
        support_indices = np.random.choice(range(len(class_embeddings)), k_shot, replace=False)
        query_indices = np.random.choice(np.delete(range(len(class_embeddings)), support_indices), n_query, replace=False)
        
        support_set.append(class_embeddings[support_indices].detach().numpy())
        query_set.append(class_embeddings[query_indices].detach().numpy())
        support_labels.extend([label_map[label]] * k_shot)
        query_labels.extend([label_map[label]] * n_query)
    
    support_set = np.concatenate(support_set, axis=0)
    query_set = np.concatenate(query_set, axis=0)
    support_labels = np.array(support_labels)
    query_labels = np.array(query_labels)
    
    return support_set, query_set, support_labels, query_labels


In [11]:
# Training parameters
n_epochs = 100
n_way = 5
k_shot = 5
n_query = 5
lr = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(n_epochs):
    print(f"Epoch: {epoch + 1}/{n_epochs}")

    support_set, query_set, support_labels, query_labels = create_few_shot_task(train_values, np.array(train_matrix_labels), n_way, k_shot, n_query)
    support_set = torch.tensor(support_set, dtype=torch.float32)
    query_set = torch.tensor(query_set, dtype=torch.float32)
    support_labels = torch.tensor(support_labels, dtype=torch.long)
#     print(support_labels)
    model.train()
    optimizer.zero_grad()
    scores = model(support_set, support_labels, query_set)
    loss = criterion(scores, torch.tensor(query_labels, dtype=torch.long))

    loss.backward()
    optimizer.step()
#     print(scores)
    # Compute accuracy
    _, predictions = torch.max(scores, 1)
#     print(predictions)
#     print(query_labels)
    accuracy = accuracy_score(query_labels, predictions.detach().numpy())
    
    # Calculate Silhouette Coefficient
    with torch.no_grad():
        query_embeddings = model.backbone.forward(query_set)
    silhouette_coefficient = silhouette_score(query_embeddings.detach().numpy(), query_labels)

#     print(f"Loss: {loss.item()}, Accuracy: {accuracy * 100}%")
    print(f"Loss: {loss.item()}, Accuracy: {accuracy * 100}%, Silhouette Coefficient: {silhouette_coefficient}")


Epoch: 1/100
Loss: 0.9209780097007751, Accuracy: 76.0%
Loss: 0.9209780097007751, Accuracy: 76.0%, Silhouette Coefficient: 0.28050750494003296
Epoch: 2/100
Loss: 0.8113956451416016, Accuracy: 68.0%
Loss: 0.8113956451416016, Accuracy: 68.0%, Silhouette Coefficient: 0.25324082374572754
Epoch: 3/100
Loss: 0.7578932046890259, Accuracy: 80.0%
Loss: 0.7578932046890259, Accuracy: 80.0%, Silhouette Coefficient: 0.2785293161869049
Epoch: 4/100
Loss: 0.652604341506958, Accuracy: 72.0%
Loss: 0.652604341506958, Accuracy: 72.0%, Silhouette Coefficient: 0.29935646057128906
Epoch: 5/100
Loss: 0.6010200381278992, Accuracy: 76.0%
Loss: 0.6010200381278992, Accuracy: 76.0%, Silhouette Coefficient: 0.2490777224302292
Epoch: 6/100
Loss: 0.5389792323112488, Accuracy: 84.0%
Loss: 0.5389792323112488, Accuracy: 84.0%, Silhouette Coefficient: 0.2982012927532196
Epoch: 7/100
Loss: 0.43274182081222534, Accuracy: 96.0%
Loss: 0.43274182081222534, Accuracy: 96.0%, Silhouette Coefficient: 0.3874549865722656
Epoch: 8/1

In [12]:
# Your code for loading the data (unchanged)
input_file = "/kaggle/input/5way-5shot/test_LSA64_5way_5shot_suppot_query.csv"

values = []
matrix_labels = []
num_rows = 0

with open(input_file, "r") as f_input:
    reader = csv.reader(f_input)
    for row in reader:
        row_values = []
        for i in range(len(row) - 1):
            column_value = ast.literal_eval(row[i])
            row_values.append(column_value)
        values.append(torch.tensor(row_values))
        matrix_labels.append(ast.literal_eval(row[-1]))
        num_rows += 1
        
matrix_labels = np.array(matrix_labels)

In [13]:
test_values = torch.stack(values)
test_matrix_labels = matrix_labels

In [16]:
# Set the number of evaluation episodes
n_evaluation_episodes = 100

# Initialize the accuracy accumulator
total_accuracy = 0

# Set the model to evaluation mode
model.eval()

# Evaluation loop
for episode in range(n_evaluation_episodes):
    support_set, query_set, support_labels, query_labels = create_few_shot_task(test_values, np.array(test_matrix_labels), n_way, k_shot, n_query)
    support_set = torch.tensor(support_set, dtype=torch.float32)
    query_set = torch.tensor(query_set, dtype=torch.float32)
    support_labels = torch.tensor(support_labels, dtype=torch.long) 
#     print(support_labels)
    with torch.no_grad():
        scores = model(support_set, support_labels, query_set)
#         print(scores.shape)
#         print(scores)
        # Compute accuracy
        _, predictions = torch.max(scores, 1)
#         print(predictions.detach().numpy())
#         print(query_labels)
        accuracy = accuracy_score(query_labels, predictions.detach().numpy())
        print(accuracy)
        # Calculate Silhouette Coefficient
        with torch.no_grad():
            query_embeddings = model.backbone.forward(query_set)
            silhouette_coefficient = silhouette_score(query_embeddings.detach().numpy(), query_labels)
        total_accuracy += accuracy
        print("Accuracy: {:.2%}, Silhouette Coefficient: {:.2f}".format(accuracy, silhouette_coefficient))

# Compute the average accuracy over all evaluation episodes
average_accuracy = total_accuracy / n_evaluation_episodes
print(f"Average accuracy: {average_accuracy * 100}%")

0.96
Accuracy: 96.00%, Silhouette Coefficient: 0.48
0.84
Accuracy: 84.00%, Silhouette Coefficient: 0.33
0.92
Accuracy: 92.00%, Silhouette Coefficient: 0.46
0.96
Accuracy: 96.00%, Silhouette Coefficient: 0.54
0.84
Accuracy: 84.00%, Silhouette Coefficient: 0.38
0.88
Accuracy: 88.00%, Silhouette Coefficient: 0.40
0.92
Accuracy: 92.00%, Silhouette Coefficient: 0.41
0.88
Accuracy: 88.00%, Silhouette Coefficient: 0.39
0.88
Accuracy: 88.00%, Silhouette Coefficient: 0.43
0.96
Accuracy: 96.00%, Silhouette Coefficient: 0.51
0.88
Accuracy: 88.00%, Silhouette Coefficient: 0.37
0.92
Accuracy: 92.00%, Silhouette Coefficient: 0.37
0.96
Accuracy: 96.00%, Silhouette Coefficient: 0.48
0.88
Accuracy: 88.00%, Silhouette Coefficient: 0.47
0.96
Accuracy: 96.00%, Silhouette Coefficient: 0.49
0.84
Accuracy: 84.00%, Silhouette Coefficient: 0.36
0.84
Accuracy: 84.00%, Silhouette Coefficient: 0.36
0.76
Accuracy: 76.00%, Silhouette Coefficient: 0.25
0.92
Accuracy: 92.00%, Silhouette Coefficient: 0.50
0.84
Accurac