<a href="https://colab.research.google.com/github/JTalentTwoPointO/Automation/blob/master/dl_comp2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from tensorflow import keras
import os
import glob
from PIL import Image

# Define data directories
train_dir = "/content/drive/MyDrive/comp2_open/train"
val_dir = "/content/drive/MyDrive/comp2_open/val"
test_dir = "/content/drive/MyDrive/comp2_open/test"

# Define file pattern and image size
file_pattern = "*.jpg"
image_size = (128, 128)

# Load data
def load_data(data_dir):
    data = []
    for file in os.listdir(data_dir):
        img = Image.open(data_dir + "/" + file)
        img = img.resize(image_size)
        img = np.array(img)
        data.append(img)
    return np.array(data)

train_data = load_data(train_dir)
val_data = load_data(val_dir)
test_data = load_data(test_dir)
train_labels = pd.read_csv("/content/drive/MyDrive/Comp2 /train_labels_ex2.csv")["label"].values
val_labels = pd.read_csv("/content/drive/MyDrive/Comp2 /val_labels_ex2.csv")["label"].values

# Print shape of data
print("Train data shape:", train_data.shape)
print("Validation data shape:", val_data.shape)
print("Test data shape:", test_data.shape)







Train data shape: (614, 128, 128)
Validation data shape: (133, 128, 128)
Test data shape: (133, 128, 128)


In [None]:
# Normalize data
train_data = train_data / 255.0
val_data = val_data / 255.0
test_data = test_data / 255.0

# Flatten data
train_data = train_data.reshape(train_data.shape[0], -1)
val_data = val_data.reshape(val_data.shape[0], -1)
test_data = test_data.reshape(test_data.shape[0], -1)

In [None]:
# KMeans clustering
kmeans = KMeans(n_clusters=10, random_state=42)
kmeans.fit(train_data)
train_clusters = kmeans.predict(train_data)
val_clusters = kmeans.predict(val_data)
test_clusters = kmeans.predict(test_data)

# Reshape train_clusters to have shape (num_samples, 1)
train_clusters = np.reshape(train_clusters, (train_clusters.shape[0], 1))

# Stack clusters to data
train_data = np.concatenate([train_data, train_clusters], axis=1)
val_data = np.concatenate([val_data, val_clusters.reshape((val_clusters.shape[0], 1))], axis=1)
test_data = np.concatenate([test_data, test_clusters.reshape((test_clusters.shape[0], 1))], axis=1)




In [None]:
# Define the autoencoder model
input_shape = train_data.shape[1:]
model = keras.models.Sequential([
    keras.layers.Dense(256, activation="relu", input_shape=input_shape),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(32, activation="relu"),
    #ToDo - consider smaller bottle neck
    keras.layers.Dense(16, activation="relu"),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dense(np.prod(input_shape)),
    keras.layers.Reshape(input_shape)
])
model.compile(loss="mse", optimizer="adam")
model.fit(train_data, train_data, epochs=50, batch_size=32, validation_data=(val_data, val_data))







Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f098e5bbaf0>

In [None]:
encoder = keras.models.Model(inputs=model.input, outputs=model.layers[-2].output)
train_data = encoder.predict(train_data)
val_data = encoder.predict(val_data)
test_data = encoder.predict(test_data)



In [None]:
# Add Gaussian noise
noise_factor = 0.1
train_data_noisy = train_data + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=train_data.shape)
val_data_noisy = val_data + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=val_data.shape)
test_data_noisy = test_data + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=test_data.shape)

# Clip noisy data to [0, 1]
train_data_noisy = np.clip(train_data_noisy, 0., 1.)
val_data_noisy = np.clip(val_data_noisy, 0., 1.)
test_data_noisy = np.clip(test_data_noisy, 0., 1.)

In [None]:
# Define the neural network model
model = keras.models.Sequential([
    keras.layers.Dense(256, activation="relu", input_shape=train_data_noisy.shape[1:]),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(16, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(train_data_noisy, train_labels, epochs=100, batch_size=32, validation_data=(val_data_noisy, val_labels))


model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(train_data_noisy, train_labels, epochs=100, batch_size=32, validation_data=(val_data_noisy, val_labels))



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f0988e1f7c0>

In [None]:
# Evaluate model
test_labels = model.predict(test_data_noisy)
test_labels = (test_labels > 0.5).astype(int)

# Load test file names and remove file extension
test_files = []
for file in os.listdir(test_dir):
    filename, extension = os.path.splitext(file)
    test_files.append(filename)

# Create dataframe with file names and predicted labels
test_labels_df = pd.DataFrame({"id": test_files, "label": test_labels.squeeze()})

# Save labels to csv
test_labels_df.to_csv("/content/test_labels_ex2.csv", index=False)



