In [None]:
#Do this if using google colab to run the code
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import necessary libraries for the neural network and data processing
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras import regularizers
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
import os
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import csv
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm


In [None]:
# Image dimensions and batch size for training
img_width, img_height = 240, 320
BATCH = 86

# Paths to training and validation datasets
DATASET_PATH = '/content/drive/MyDrive/DD_124_final_project/Image_Data/Preprocessed_Nov30/'
VALIDATION_PATH = '/content/drive/MyDrive/DD_124_final_project/Image_Data/validation_preprocessed/'

# Path and prefix for saving the trained model
modelsave_prefix='dec3newarch'
modelsavepath= '/content/drive/MyDrive/DD_124_final_project/models/' + modelsave_prefix + '.hdf5'

# Function to count files in each category (signal/background) for both datasets
def count_files_in_directory(path):
    for label in ['background', 'signal']:
        folder_path = os.path.join(path, label)
        print(f"Total files in {label} folder: {len(os.listdir(folder_path))}")

# Count and display the number of files in training and validation datasets
print("Training Dataset:")
count_files_in_directory(DATASET_PATH)

print("Validation Dataset:")
count_files_in_directory(VALIDATION_PATH)


Training Dataset:
Total files in background folder: 5490
Total files in signal folder: 4040
Validation Dataset:
Total files in background folder: 282
Total files in signal folder: 375


In [None]:
# Function to create a DataFrame listing file paths and their corresponding labels
def create_dataframe(path, label_names):
    file_paths = []
    labels = []
    for label in label_names:
        folder_path = os.path.join(path, label)
        for file in tqdm(os.listdir(folder_path)):
            file_paths.append(os.path.join(folder_path, file))
            labels.append(label)
    return pd.DataFrame({'filepath': file_paths, 'label': labels})

# Create DataFrames for training and validation datasets
train_df = create_dataframe(DATASET_PATH, ['background', 'signal'])
val_df = create_dataframe(VALIDATION_PATH, ['background', 'signal'])

# Initialize the ImageDataGenerator for data augmentation and preprocessing
datagen = ImageDataGenerator(rescale=1.0/255.0, zoom_range=0.25)

# Generate data batches for training and validation
train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=BATCH,
    class_mode='binary',
    color_mode='grayscale'
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='filepath',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=BATCH,
    class_mode='binary',
    color_mode='grayscale'
)

# Calculate and display class weights for handling class imbalance
class_counts = np.bincount(train_generator.classes)
total_samples = train_generator.samples
class_weights = {i: total_samples/(len(class_counts) * class_counts[i]) for i in range(len(class_counts))}

print("Class Weights:", class_weights)


  0%|          | 0/5490 [00:00<?, ?it/s]

  0%|          | 0/4040 [00:00<?, ?it/s]

  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/375 [00:00<?, ?it/s]

                                            filepath       label
0  /content/drive/MyDrive/DD_124_final_project/Im...  background
1  /content/drive/MyDrive/DD_124_final_project/Im...  background
2  /content/drive/MyDrive/DD_124_final_project/Im...  background
3  /content/drive/MyDrive/DD_124_final_project/Im...  background
4  /content/drive/MyDrive/DD_124_final_project/Im...  background
                                            filepath       label
0  /content/drive/MyDrive/DD_124_final_project/Im...  background
1  /content/drive/MyDrive/DD_124_final_project/Im...  background
2  /content/drive/MyDrive/DD_124_final_project/Im...  background
3  /content/drive/MyDrive/DD_124_final_project/Im...  background
4  /content/drive/MyDrive/DD_124_final_project/Im...  background
Found 9530 validated image filenames belonging to 2 classes.
lol
Found 657 validated image filenames belonging to 2 classes.
Class Weights: {0: 0.8679417122040073, 1: 1.1794554455445545}


In [None]:
# Exponential learning rate decay parameters
initial_learning_rate = 0.001
decay_steps = 10000
decay_rate = 0.9
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=decay_steps,
    decay_rate=decay_rate,
    staircase=True)

# Neural network architecture with convolutional and dense layers
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 1)))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))

# Flatten the output for the dense layers
model.add(Flatten())

# Add multiple dense layers with dropout for regularization. This helps prevent overfitting.
for _ in range(5):
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model with Adam optimizer and binary crossentropy loss
optimizer = Adam(learning_rate=lr_schedule)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 238, 318, 32)      320       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 119, 159, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 117, 157, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 58, 78, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 56, 76, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 28, 38, 128)      

In [None]:
# Callbacks for early stopping and model checkpointing
model_checkpoint = ModelCheckpoint(modelsavepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=15)

# Training the model
model.fit(train_generator,
          epochs=150,
          validation_data=validation_generator,
          class_weight=class_weights,
          callbacks=[early_stopping, model_checkpoint],
          verbose =1)


Epoch 1/150
 21/111 [====>.........................] - ETA: 10:43 - loss: 3.2571 - accuracy: 0.5227

In [None]:
# Load the best model saved during training
modelsaved=tf.keras.models.load_model(modelsavepath)

# Convert the trained model to TensorFlow Lite format for deployment
converter = tf.lite.TFLiteConverter.from_keras_model(modelsaved)
tflite_model = converter.convert()

# Save the TFLite model
tflitesavepath = '/content/drive/MyDrive/UCSD/2023-2024/DD_124_final_project/models/' + modelsave_prefix + '.tflite'
with open(tflitesavepath, 'wb') as f:
    f.write(tflite_model)

print("Model converted to TensorFlow Lite successfully.")


