In [None]:
# imports
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import InceptionResNetV2, VGG19, ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.semi_supervised import LabelPropagation
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
# Load labeled/original dataset
labeled_data = pd.read_csv('path/to/labeled_data.csv')
unlabeled_data = pd.read_csv('path/to/unlabeled_data.csv')

# Split labeled data into training and validation sets
train_data, val_data = train_test_split(labeled_data, test_size=0.2, random_state=42)

# ImageDataGenerator for data augmentation and preprocessing
datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2
)

train_generator = datagen.flow_from_dataframe(
    train_data,
    directory='path/to/images',
    x_col='image_path',
    y_col='label',
    target_size=(299, 299),  # Change to (224, 224) for VGG19 or ResNet50
    batch_size=32,
    class_mode='binary'
)

val_generator = datagen.flow_from_dataframe(
    val_data,
    directory='path/to/images',
    x_col='image_path',
    y_col='label',
    target_size=(299, 299),  # Change to (224, 224) for VGG19 or ResNet50
    batch_size=32,
    class_mode='binary'
)

unlabeled_generator = datagen.flow_from_dataframe(
    unlabeled_data,
    directory='path/to/images',
    x_col='image_path',
    y_col=None,
    target_size=(299, 299),  # Change to (224, 224) for  VGG19 or ResNet50
    batch_size=32,
    class_mode=None
)

In [None]:
# Build CNN model
def build_model(architecture='inception_resnet'):
    if architecture == 'inception_resnet':
        base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
    elif architecture == 'vgg19':
        base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    elif architecture == 'resnet50':
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False
    
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Choose architecture: 'inception_resnet', 'vgg19', or 'resnet50'
model = build_model(architecture='inception_resnet')


In [None]:
# CNN Training
history = model.fit(
    train_generator,
    epochs=30,
    validation_data=val_generator
)

In [None]:
# Semi-supervised Approach
# Prepare data for label propagation
labeled_images = np.array([plt.imread(f'path/to/images/{path}') for path in train_data['image_path']])
unlabeled_images = np.array([plt.imread(f'path/to/images/{path}') for path in unlabeled_data['image_path']])
all_images = np.concatenate((labeled_images, unlabeled_images), axis=0)
all_images = all_images.reshape((len(all_images), -1))

# Encode labels
le = LabelEncoder()
labeled_labels = le.fit_transform(train_data['label'])

# Combine labels with -1 for unlabeled data
labels = np.concatenate((labeled_labels, [-1]*len(unlabeled_images)))

# Apply Label Propagation
label_prop_model = LabelPropagation()
label_prop_model.fit(all_images, labels)

# Extract pseudo-labels
pseudo_labels = label_prop_model.transduction_[-len(unlabeled_images):]
pseudo_labels = le.inverse_transform(pseudo_labels)

# Create a new DataFrame with pseudo-labeled data
pseudo_labeled_data = pd.DataFrame({
    'image_path': unlabeled_data['image_path'],
    'label': pseudo_labels
})

# Combine labeled data with pseudo-labeled data
combined_data = pd.concat([train_data, pseudo_labeled_data])

# Create a new training generator with the combined data
combined_generator = datagen.flow_from_dataframe(
    combined_data,
    directory='path/to/images',
    x_col='image_path',
    y_col='label',
    target_size=(299, 299),  # Change to (224, 224) for VGG19 or ResNet50
    batch_size=32,
    class_mode='binary'
)

In [None]:
# Unfreeze the base model layers for fine-tuning
for layer in model.layers:
    layer.trainable = True

# Recompile the model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Retrain the model with the combined data
history_combined = model.fit(
    combined_generator,
    epochs=30,
    validation_data=val_generator
)

In [None]:
# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_generator)
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')