In [None]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense
from keras.preprocessing.image import ImageDataGenerator
from sklearn.cluster import KMeans

In [None]:
# Set up the image data generator with desired data augmentation settings
image_data_generator = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [None]:
# Set the path to your dataset directory
data_directory = "C:/Users/Jakub/PycharmProjects/BigDataProjekt/data_classes"

# Set the batch size for training
batch_size = 32

# Set the image size
image_size = (224, 224)

# Create the generator for reading and augmenting the images
data_generator = image_data_generator.flow_from_directory(
    data_directory,
    target_size=image_size,
    batch_size=batch_size,
    class_mode=None,  # Important: Set class_mode to None for unlabeled data
    shuffle=True
)

In [None]:
# Define the self-supervised learning model
input_shape = (image_size[0], image_size[1], 3)  # Assumes 3-channel RGB images
inputs = Input(shape=input_shape)
# Add your model architecture layers here

# Add a final dense layer for the self-supervised task
self_supervised_output = Dense(128, activation='relu')(inputs)

# Create the self-supervised learning model
model = Model(inputs=inputs, outputs=self_supervised_output)

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(data_generator, epochs=10)

In [None]:
# Remove the final dense layer from the trained model
feature_extractor = Model(inputs=model.input, outputs=model.layers[-2].output)

# Extract features from your unlabeled dataset
unlabeled_data_generator = image_data_generator.flow_from_directory(
    data_directory,
    target_size=image_size,
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

features = feature_extractor.predict(unlabeled_data_generator)

In [None]:
# Set the number of clusters
num_clusters = 3

# Perform K-means clustering
kmeans = KMeans(n_clusters=num_clusters)
cluster_labels = kmeans.fit_predict(features)

In [None]:
# Perform analysis on the clustering results
# For example, you can print the cluster labels for each data point
for i in range(len(cluster_labels)):
    print(f"Data point {i+1}: Cluster {cluster_labels[i]}")