In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/best-mob/best_mob.keras
/kaggle/input/resnet-model/best_res1.keras
/kaggle/input/machine-learning-in-science-ii-2024/sampleSubmission.csv
/kaggle/input/machine-learning-in-science-ii-2024/training_norm.csv


In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
import multiprocessing

2024-03-27 18:28:52.981479: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-27 18:28:52.981681: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-27 18:28:53.145852: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
image_shape = (224, 224, 3)

import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, targets, root_dir_original, batch_size=32, image_shape=(224, 224, 3), shuffle=True):
        self.targets = targets
        self.root_dir_original = root_dir_original
        self.batch_size = batch_size
        self.image_shape = image_shape
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.targets) // self.batch_size

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
        X, y = self.__data_generation(indexes)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.targets))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        X = np.empty((len(indexes), *self.image_shape))
        y = np.empty((len(indexes), 2))

        for i, index in enumerate(indexes):
            img_name = os.path.join(self.root_dir_original, str(int(self.targets.iloc[index]['image_id'])) + '.png')
            img = tf.keras.preprocessing.image.load_img(img_name, target_size=self.image_shape)
            img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
            X[i,] = img_array
            
            angle = self.targets.iloc[index]['angle']
            speed = self.targets.iloc[index]['speed']
            y[i,] = [angle, speed]

        return X, y

In [4]:
# Load your data
targets_csv = '/kaggle/input/machine-learning-in-science-ii-2024/training_norm.csv'  # Replace with the correct CSV file
root_dir_original = '/kaggle/input/machine-learning-in-science-ii-2024/training_data/training_data'

# Load targets
targets = pd.read_csv(targets_csv)

# Split the dataset
train_targets, test_val_targets = train_test_split(targets, test_size=0.3, random_state=42)
val_targets, test_targets = train_test_split(test_val_targets, test_size=0.5, random_state=42)

# Create data generators for training, validation, and test sets
train_dataset = CustomDataGenerator(targets=train_targets, root_dir_original=root_dir_original)
val_dataset = CustomDataGenerator(targets=val_targets, root_dir_original=root_dir_original)
test_dataset = CustomDataGenerator(targets=test_targets, root_dir_original=root_dir_original)

In [None]:
from tensorflow.keras.layers import Layer
from tensorflow.keras.applications import ResNet50

# Custom layer to clip output values between 0 and 1
class ClipOutput(Layer):
    def __init__(self, **kwargs):
        super(ClipOutput, self).__init__(**kwargs)

    def call(self, inputs):
        return tf.clip_by_value(inputs, clip_value_min=0.0, clip_value_max=1.0)

# Load pre-trained ResNet50 model without top layers
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# Freeze the base model layers
base_model.trainable = True

# Define the model with ReLU activation for the output layer
inputs = tf.keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=True)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer='l2')(x)
combined_output = tf.keras.layers.Dense(2, activation='linear', name='combined_output')(x)

# Clip the values in the combined output to be between 0 and 1 using custom layer
#clipped_output = ClipOutput()(combined_output)

# Define the model
model = tf.keras.Model(inputs, combined_output)



In [None]:
base_model.summary()

In [None]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=tf.keras.losses.MeanSquaredError())

# Print model summary
#model.summary()

In [None]:
# Define ModelCheckpoint callback
checkpoint_filepath = '/kaggle/working/best_res1.keras'
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    save_weights_only=False,  # Save the entire model
    verbose=1
)

# Training loop
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=30,
    callbacks=[model_checkpoint]  # Include the ModelCheckpoint callback
)

# Load the best model based on validation loss
best_model = tf.keras.models.load_model(checkpoint_filepath, safe_mode=False)

# Evaluate on the test set
best_model.evaluate(test_dataset)

print('Finished Training')

**If Want To Load A Model Instead**

In [None]:
import shutil

# Copy the model to the working directory
shutil.copy('/kaggle/input/resnet-model/best_res1.keras', '/kaggle/working/res.keras')

In [None]:
loaded_model = tf.keras.models.load_model('/kaggle/working/res.keras')

In [None]:
import numpy as np
import tensorflow as tf

class TestDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_paths, image_shape=(224, 224, 3), batch_size=32, shuffle=False):
        self.image_paths = image_paths
        self.image_shape = image_shape
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_image_paths = [self.image_paths[i] for i in batch_indexes]
        X = self.__data_generation(batch_image_paths)
        return X

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.image_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch_image_paths):
        X = np.empty((len(batch_image_paths), *self.image_shape))
        for i, image_path in enumerate(batch_image_paths):
            img = tf.keras.preprocessing.image.load_img(image_path, target_size=self.image_shape)
            img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
            X[i,] = img_array
        return X


In [None]:
import os

# Define the path to the test data directory
test_root = '/kaggle/input/machine-learning-in-science-ii-2024/test_data/test_data/'

# Get the list of file paths to the test images
test_image_paths = [os.path.join(test_root, filename) for filename in os.listdir(test_root)]

# Create an instance of TestDataGenerator
test_data_generator = TestDataGenerator(test_image_paths, image_shape=(224, 224, 3), batch_size=32, shuffle=False)


In [None]:
# Make predictions on the test data using the best_model
test_predictions = best_model.predict(test_data_generator) #use loaded_model or best_model

# You can now use the test predictions as needed.

In [None]:
import os
import pandas as pd
import numpy as np

# Get image IDs from file names
image_ids = [os.path.splitext(os.path.basename(image_path))[0] for image_path in test_image_paths]

# Clip predicted angles and speeds to be between 0 and 1
test_predictions[:, 0] = np.clip(test_predictions[:, 0], 0, 1)  # Clip angles
test_predictions[:, 1] = np.clip(test_predictions[:, 1], 0, 1)  # Clip speeds

# Combine image IDs with predictions
predictions_df = pd.DataFrame({
    'image_id': range(1, len(image_ids) + 1),
    'angle': test_predictions[:, 0],  # Assuming angle predictions are in the first column
    'speed': test_predictions[:, 1]   # Assuming speed predictions are in the second column
})

# Define the path where you want to save the predictions CSV file
predictions_csv_path = 'predictions.csv'

# Write predictions to CSV file
predictions_df.to_csv(predictions_csv_path, index=False)

print("Predictions saved to:", predictions_csv_path)
