In [2]:
import pandas as pd
import tensorflow as tf
import os
import shutil
from sklearn.model_selection import train_test_split
import random


2024-05-31 08:53:57.069728: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
labels_file='/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/labels.csv'

source_img_dir = '/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/Yoio_Park_Proof'
dest_img_dir ='/Users/axxtur/Documents/Uni/Data-Science-Case-Studies-SoSe24/first_tests_arthur/dataset'

label_csv= pd.read_csv(labels_file,sep=';')

img_names = label_csv['Image_name'].to_list()

# Iterate through the list and copy images
for image_name in img_names:
    source_path = os.path.join(source_img_dir, image_name)
    destination_path = os.path.join(dest_img_dir, image_name)
    
    # Check if the image exists in the source directory
    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {image_name}")



In [4]:
###Prepare the Dataset
# Function to load images
def load_image(image_name, label):
    image_name = image_name.numpy().decode('utf-8')
    image_path = os.path.join(dest_img_dir, image_name)
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match model input
    return image, label

# Create lists of image paths and labels
image_paths = label_csv['Image_name'].tolist()
labels = label_csv['Rule8'].tolist()

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Convert lists to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

print(f'Number of training images: {len(train_dataset)}')

# Map function with debugging
def map_function(image_name, label):
    image, label = tf.py_function(func=load_image, inp=[image_name, label], Tout=[tf.float32, tf.int32])
    image.set_shape((224, 224, 3))
    label.set_shape([])
    return image, label

train_dataset = train_dataset.map(map_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.map(map_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Verify the dataset sizes after mapping
print(f'Train dataset size after mapping: {len(list(train_dataset))}')
print(f'Test dataset size after mapping: {len(list(test_dataset))}')

# Batch and prefetch the datasets -> 
train_dataset = train_dataset.batch(32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)




2024-05-31 08:55:04.501291: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Number of training images: 812
Train dataset size after mapping: 812
Test dataset size after mapping: 204


In [5]:
### Data Argumentation
# Data augmentation (optional but recommended)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
])

# Apply data augmentation to the training dataset
def augment(image, label):
    return data_augmentation(image, training=True), label

train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)




In [6]:
### Create the Model

from tensorflow.keras import layers, models

# Create a simple CNN model
model = models.Sequential([
    layers.InputLayer(input_shape=(224, 224, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(2, activation='softmax')  # Adjust the number of classes as per your labels
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [7]:
### Train the Model

history = model.fit(train_dataset, epochs=5, validation_data=test_dataset)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
### Evaluate the model
test_loss, test_acc = model.evaluate(test_dataset)
print(f'Test Accuracy: {test_acc}')


Test Accuracy: 0.9117646813392639


In [35]:
print(len(image_paths))

303


In [17]:
### Precict Images
def predict_new_images(model, image_folder, num_images=100):
    random_images = random.sample(os.listdir(image_folder), num_images)
    results = []
    
    for image_name in random_images:
        image_path = os.path.join(image_folder, image_name)
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [224, 224])  # Resize to match model input
        image = tf.expand_dims(image, 0)  # Add a batch dimension
        predictions = model.predict(image)
        predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]
        results.append((image_name, predicted_class))
    
    return results

# Specify the folder containing new images
predictions_folder_0 = 'predictions_0'
predictions_folder_1 = 'predictions_1'

# Create the predictions folder if it doesn't exist
os.makedirs(predictions_folder_0, exist_ok=True)
os.makedirs(predictions_folder_1, exist_ok=True)

# Predict the class for 10 random images from the new image folder
predicted_results = predict_new_images(model, source_img_dir)
predicted_df = pd.DataFrame(predicted_results, columns=['image_name', 'predicted_class'])

print(predicted_df)

# Save the results to a CSV file
predicted_df.to_csv('predicted_results.csv', index=False)

# Copy the predicted images to the predictions folder
for index, row in predicted_df.iterrows():
    image_name = row['image_name']
    predicted_class = row['predicted_class']
    src_path = os.path.join(source_img_dir, image_name)
    dst_path_0 = os.path.join(predictions_folder_0, image_name)
    dst_path_1 = os.path.join(predictions_folder_1, image_name)
    if predicted_class == 1:
        shutil.copy(src_path, dst_path_1)
        print(f'Copied {image_name} to {predictions_folder_1}')
    
    if predicted_class == 0:
        shutil.copy(src_path, dst_path_0)
        print(f'Copied {image_name} to {predictions_folder_0}')

# Verify the copied images
#print(f'Copied images: {os.listdir(predictions_folder)}')

                                           image_name  predicted_class
0   1588102754ef37b75534a067b80853684366db49f9cd54...                0
1   16111409184380c4bb8020ad9d6f29dc51ffa9e614baf2...                0
2   16097735485deba827a70b144707dcd5f7fdcd399478cc...                0
3   1605814493efb019e9c5f23b08bd924c0dd4b4fe1682d3...                0
4   15984641483deec9931b094b48c97483dff6a3a301f513...                0
..                                                ...              ...
95  1600095908eca13bfe5f77cc824324f00aee01471e75aa...                0
96  1594271635fb4062a23bfce5c3a5e42f1c9c5994bee628...                0
97  160898569319c6f0eb56511c40b92da66a711f5d0a4525...                0
98  1588353652adbf6b5069e72d45483907e74a16c90b2474...                0
99  1605640153820b2b93a51aa86dc47b373150e92414ce99...                0

[100 rows x 2 columns]
Copied 1588102754ef37b75534a067b80853684366db49f9cd54dd.jpg to predictions_0
Copied 16111409184380c4bb8020ad9d6f29dc51ffa9e6