In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc
from tensorflow.keras.utils import to_categorical

In [9]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [3]:
# Initialize the ResNet50 base model
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)

In [4]:
# Define the number of classes dynamically based on training data or set statically
num_classes = 6 
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
# Data preparation
train_dir = '/content/drive/MyDrive/datasetip06/test' # please define your own path here
val_dir = '/content/drive/MyDrive/datasetip06/test' # please define your own path here

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 5400 images belonging to 6 classes.
Found 600 images belonging to 6 classes.


In [6]:
# Model training
history = model.fit(
    train_generator,
    steps_per_epoch=5399 // 32,  # Adjust based on your dataset
    epochs= 100,  # Updated as suggested
    validation_data=validation_generator,
    validation_steps=600 // 32)  # Adjust based on your dataset

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
embedding_model = Model(inputs=base_model.input, outputs=x)  # x is the output of the last Dense layer before predictions

# Function to calculate prototypes
def calculate_prototypes(embedding_model, data_generator, num_classes):
    embeddings = []
    labels = []
    for images, label in data_generator:
        # Predict to get embeddings
        emb = embedding_model.predict(images)
        embeddings.append(emb)
        labels.append(label)

    embeddings = np.vstack(embeddings)
    labels = np.vstack(labels)

    prototypes = []
    for i in range(num_classes):  # Assuming num_classes is defined
        class_embeddings = embeddings[np.argmax(labels, axis=1) == i]
        prototype = np.mean(class_embeddings, axis=0)
        prototypes.append(prototype)

    return np.array(prototypes)

# Function for distance entropy calculation
def distance_entropy(prototypes, embeddings):
    distances = np.sqrt(((embeddings[:, np.newaxis, :] - prototypes[np.newaxis, :, :]) ** 2).sum(axis=2))
    softmax_distances = np.exp(-distances) / np.exp(-distances).sum(axis=1, keepdims=True)
    entropy = -np.sum(softmax_distances * np.log(softmax_distances + 1e-5), axis=1)
    return entropy

In [None]:
def proto_de(features, labels):
    # Calculate the prototype (mean feature vector) for each class
    class_prototypes = {}
    for label in np.unique(labels):
        class_indices = np.where(labels == label)[0]
        class_features = features[class_indices]
        class_prototypes[label] = np.mean(class_features, axis=0)
    
    # Calculate the distance from each feature vector to its class prototype
    distances = []
    for feature, label in zip(features, labels):
        prototype = class_prototypes[label]
        distance = np.linalg.norm(feature - prototype)
        distances.append(distance)
    
    return distances

In [None]:
def bound_de(model, features, labels):
    decision_scores = model.predict(features)  # Assuming this returns a score reflecting distance from decision boundary
    distances = np.abs(0.5 - decision_scores)  # Assuming scores are probabilities in [0, 1] for binary classification
    
    return distances


In [None]:
def multi_branch_selection(proto_scores, bound_scores, top_k):
    # Combine Proto-DE and Bound-DE scores
    # A simple approach could be averaging the scores
    combined_scores = (np.array(proto_scores) + np.array(bound_scores)) / 2
    
    selected_indices = np.argsort(combined_scores)[-top_k:]
    
    return selected_indices

In [None]:
def train_with_selection(model, train_data, train_labels, epochs=10, batch_size=32, top_k=100):
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        
        # Step 1: Feature Extraction
        features = extract_features(model, train_data)
        
        # Step 2: Calculate Proto-DE and Bound-DE scores
        proto_scores = proto_de(features, train_labels)
        bound_scores = bound_de(model, features, train_labels)
        
        # Step 3: Select samples using Multi-Branch selection
        selected_indices = multi_branch_selection(proto_scores, bound_scores, top_k=top_k)
        selected_data = train_data[selected_indices]
        selected_labels = train_labels[selected_indices]
        
        # Step 4: Train on selected samples
        model.fit(selected_data, selected_labels, batch_size=batch_size)

In [None]:
# Plotting training results with specified colors
plt.plot(history.history['accuracy'], color='darkblue', label='Proto-DE-Good')
plt.plot(history.history['val_accuracy'], color='orange', label='Proto-DE-Bad')
plt.plot(history.history['val_loss'], color='gray', label='Bound-DE-Good')
plt.plot(history.history['loss'], color='yellow', label='Bound-DE-Bad')
plt.plot(history.history['val_loss'], color='lightblue', label='Multi-Branch-Good')
plt.plot(history.history['loss'], color='green', label='Multi-Branch-Bad')

plt.title('Test accuracy when selecting different samples using different methods')
plt.xlabel('Test Accuracy')
plt.ylabel('Select Percentage')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=3)

# Adjusting legends alignment
plt.subplots_adjust(bottom=0.15)

plt.show()

In [None]:
!pip freeze > requirements.txt
