In [52]:
import pandas as pd
import tensorflow as tf
import os
import shutil
from sklearn.model_selection import train_test_split
import random


In [None]:
labels_file='/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/labels.csv'

source_img_dir = '/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/Yoio_Park_Proof'
dest_img_dir ='/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/first_tests_arthur/dataset'

label_csv= pd.read_csv(labels_file,sep=';')

img_names = label_csv['Image_name'].to_list()

# Iterate through the list and copy images
for image_name in img_names:
    source_path = os.path.join(source_img_dir, image_name)
    destination_path = os.path.join(dest_img_dir, image_name)
    
    # Check if the image exists in the source directory
    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {image_name}")



In [47]:
###Prepare the Dataset
# Function to load images
def load_image(image_name, label):
    image_name = image_name.numpy().decode('utf-8')
    image_path = os.path.join(dest_img_dir, image_name)
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match model input
    return image, label

# Create lists of image paths and labels
image_paths = label_csv['Image_name'].tolist()
labels = label_csv['Rule1'].tolist()

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Convert lists to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

print(f'Number of training images: {len(train_dataset)}')

# Map function with debugging
def map_function(image_name, label):
    image, label = tf.py_function(func=load_image, inp=[image_name, label], Tout=[tf.float32, tf.int32])
    image.set_shape((224, 224, 3))
    label.set_shape([])
    return image, label

train_dataset = train_dataset.map(map_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.map(map_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Verify the dataset sizes after mapping
print(f'Train dataset size after mapping: {len(list(train_dataset))}')
print(f'Test dataset size after mapping: {len(list(test_dataset))}')

# Batch and prefetch the datasets -> 
train_dataset = train_dataset.batch(32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)




Number of training images: 242
Train dataset size after mapping: 242
Test dataset size after mapping: 61


In [48]:
### Data Argumentation
# Data augmentation (optional but recommended)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
])

# Apply data augmentation to the training dataset
def augment(image, label):
    return data_augmentation(image, training=True), label

train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)




In [49]:
### Create the Model

from tensorflow.keras import layers, models

# Create a simple CNN model
model = models.Sequential([
    layers.InputLayer(input_shape=(224, 224, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(2, activation='softmax')  # Adjust the number of classes as per your labels
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [50]:
### Train the Model

history = model.fit(train_dataset, epochs=5, validation_data=test_dataset)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [51]:
### Evaluate the model
test_loss, test_acc = model.evaluate(test_dataset)
print(f'Test Accuracy: {test_acc}')


Test Accuracy: 0.9344262480735779


In [35]:
print(len(image_paths))

303


In [55]:
### Precict Images
def predict_new_images(model, image_folder, num_images=10):
    random_images = random.sample(os.listdir(image_folder), num_images)
    results = []
    
    for image_name in random_images:
        image_path = os.path.join(image_folder, image_name)
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [224, 224])  # Resize to match model input
        image = tf.expand_dims(image, 0)  # Add a batch dimension
        predictions = model.predict(image)
        predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]
        results.append((image_name, predicted_class))
    
    return results

# Specify the folder containing new images
predictions_folder = 'predictions'

# Create the predictions folder if it doesn't exist
os.makedirs(predictions_folder, exist_ok=True)

# Predict the class for 10 random images from the new image folder
predicted_results = predict_new_images(model, source_img_dir)
predicted_df = pd.DataFrame(predicted_results, columns=['image_name', 'predicted_class'])

print(predicted_df)

# Save the results to a CSV file
predicted_df.to_csv('predicted_results.csv', index=False)

# Copy the predicted images to the predictions folder
for image_name in predicted_df['image_name']:
    src_path = os.path.join(source_img_dir, image_name)
    dst_path = os.path.join(predictions_folder, image_name)
    shutil.copy(src_path, dst_path)
    print(f'Copied {image_name} to {predictions_folder}')

# Verify the copied images
print(f'Copied images: {os.listdir(predictions_folder)}')

                                          image_name  predicted_class
0  16027721154e3af916dd80cf0c1e97b99fd6bccd18cdd6...                0
1  15923921190e3831164a41b9b000fd292e57c4963ce4ec...                0
2  1606045813b8bc56aac258aa5d6e3707e43c4b1fdcf5e1...                0
3  16006837606e58d905454e06634091413492c8a083faa0...                0
4  160490413917d3baf7e71803700a59a224fb4347ac1bdd...                0
5  1584903656b9a7ba8d3806d3291754118df8c6244a05b7...                0
6  1614764706febb85d4cc3ef66250f074fb0ee32bd443ae...                0
7  160024695927f3c11021b8ffc4e459c4148f9dad95812b...                0
8  1607705585105409e2577130214e8e8bbb11f5e21cfa27...                0
9  16083773499548fce7d48c172d5fe79ae3e763d3864bba...                0
Copied 16027721154e3af916dd80cf0c1e97b99fd6bccd18cdd6f7.jpg to predictions
Copied 15923921190e3831164a41b9b000fd292e57c4963ce4ec6a.jpg to predictions
Copied 1606045813b8bc56aac258aa5d6e3707e43c4b1fdcf5e153.jpg to predictions
Copie