# Magic Polaroid Admin:
Check for New Data & Initiate Model Retraining

### Discover Target Server via ND Catalog Server

In [None]:
import requests
import sys

response = requests.get("http://catalog.cse.nd.edu:9097/query.json")
if response.status_code != 200:
    print('Could not connect to catalog')
    sys.exit()

catalog = response.json()

listings = [entry for entry in catalog if entry.get('type') == 'magic-polaroid' and entry.get('owner') == 'mvankir2']
if not listings:
    print('Could not locate desired server on catalog')
    sys.exit()

server_details = sorted(listings, key=lambda e: e['lastheardfrom'])[-1]
print(server_details)

SERVER_URL = f"{server_details['address']}:{server_details['port']}"

{'name': 'ec2-52-91-173-94.compute-1.amazonaws.com', 'lastheardfrom': 1733954009, 'address': '52.91.173.94', 'type': 'magic-polaroid', 'owner': 'mvankir2', 'port': 8080, 'project': '52.91.173.94'}


### Check for New Data

In [None]:
check_url = f"http://{SERVER_URL}/training-data-check"

response = requests.get(check_url)
if response.status_code != 200:
    print('Could not connect to server')
    sys.exit()

data = response.json()
new_image_count = data['new-image-count']

if new_image_count == 0:
    print("No new images found")
    sys.exit()

print(f"New Image Count: {new_image_count}")

New Image Count: 7


### Download New Training Data from Server

In [None]:
download_url = f"http://{SERVER_URL}/training-data"
output_path = "training_images.zip"

!curl -o {output_path} {download_url}
!unzip {output_path} -d "/content/training_images/"
!rm {output_path}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4341k  100 4341k    0     0  2183k      0  0:00:01  0:00:01 --:--:-- 2182k
Archive:  training_images.zip
  inflating: /content/training_images/hes/hes19.jpg  
  inflating: /content/training_images/hes/hes18.jpg  
  inflating: /content/training_images/hes/hes22.jpg  
  inflating: /content/training_images/hes/hes16.jpg  
  inflating: /content/training_images/hes/hes21.jpg  
  inflating: /content/training_images/hes/hes26.jpg  
  inflating: /content/training_images/hes/hes15.jpg  
  inflating: /content/training_images/hes/hes25.jpg  
  inflating: /content/training_images/hes/hes30.jpg  
  inflating: /content/training_images/hes/hes17.jpg  
  inflating: /content/training_images/hes/hes28.jpg  
  inflating: /content/training_images/hes/hes20.jpg  
  inflating: /content/training_images/hes/hes24.jpg  
  inflating: /content/training_

In [None]:
# Obtain most recent timestamp
import os
most_recent_timestamp = 0
output_path = "/content/training_images"

for root, dirs, files in os.walk(output_path):
    for file in files:
        if file.endswith(".jpg"):
            if file.split('.')[0].isdigit():
                timestamp = file.split('.')[0]
            else:
                continue
            most_recent_timestamp = max(int(timestamp), most_recent_timestamp)

if most_recent_timestamp == 0:
    print("Could not obtain timestamp")
    sys.exit()

print(f"Most recent timestamp: {most_recent_timestamp}")

Most recent timestamp: 1733953690254110331


### Train Model

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, applications
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Set directories for your images
data_dir = "/content/training_images"

# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],
    validation_split=0.2  # 20% of the data for validation
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    subset='validation'
)

# Debugging: Print a sample batch to confirm structure
for data_batch, label_batch in train_generator:
    print("Train batch types:", type(data_batch), type(label_batch))
    print("Train batch shapes:", data_batch.shape, label_batch.shape)
    break

# Convert ImageDataGenerator to tf.data.Dataset
def generator_to_tfdata(generator):
    for x, y in generator:
        yield tf.convert_to_tensor(x, dtype=tf.float32), tf.convert_to_tensor(y, dtype=tf.float32)

train_dataset = tf.data.Dataset.from_generator(
    lambda: generator_to_tfdata(train_generator),
    output_signature=(
        tf.TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, train_generator.num_classes), dtype=tf.float32),
    )
)

validation_dataset = tf.data.Dataset.from_generator(
    lambda: generator_to_tfdata(validation_generator),
    output_signature=(
        tf.TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, validation_generator.num_classes), dtype=tf.float32),
    )
)

# Compute class weights to handle imbalance
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))

# Load a pre-trained model without the top layer
base_model = applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

# Add custom layers for classification
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(train_generator.num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=20,
    class_weight=class_weights,
    callbacks=[early_stopping]
)

# Fine-tune the base model
base_model.trainable = True

# Recompile with a lower learning rate for fine-tuning
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_fine_tune = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=10,
    class_weight=class_weights,
    callbacks=[early_stopping]
)

# Save the trained model
model.save(f"{most_recent_timestamp}.h5")

print(f"Model training completed and saved as '{most_recent_timestamp}.h5'")

### Upload New Model:

In [None]:
most_recent_timestamp = 'model_3456'

In [None]:
put_url = f"http://{SERVER_URL}/update-model"

model_path = f"/content/{most_recent_timestamp}.h5"

with open(model_path, "rb") as model_file:
    files = {"model": model_file}
    response = requests.post(put_url, files=files)

# check response
if response.ok:
    print("Model uploaded successfully:", response.json())
else:
    print("Failed to upload model. Status code:", response.status_code)
    print("Error:", response.text)

Model uploaded successfully: {'message': 'model updated succesfully'}
