In [None]:
#imports
import os
import pandas as pd
from glob import glob
import tensorflow as tf
from tensorflow.keras import losses, optimizers, layers, Sequential
import matplotlib.pyplot as plt

In [None]:
#get image data
os.makedirs('body_score_dataset', exist_ok=True)
!git clone -b body_scores_prediction_dataset https://github.com/MVet-Platform/M-Vet_Hackathon24.git ./body_score_dataset 

In [None]:
#load label data
df_train_data = pd.read_csv('/kaggle/working/body_score_dataset/train_data.csv')
df_train_data.head()

In [None]:
df_train_data['bodyScore'].hist()
plt.show()

In [None]:
#get file path for image files
df_train_data['filepath'] = df_train_data.apply(lambda row: glob(f'body_score_dataset/**/{row.filename}')[0], axis=1)

In [None]:
df_train_data.head()

In [None]:
#create array of body scores and file paths
body_scores = df_train_data.bodyScore.values
file_paths = df_train_data.filepath.values

In [None]:
#load image and body_score
#apply preprocessing to image
#return image and body_score
def load_and_preprocess_image(file_path, body_score=None):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224]) 
    image = tf.cast(image, tf.float32) / 255.0 
    if body_score is not None:
        return image, body_score
    else:
        return image

In [None]:
# Create a TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((file_paths, body_scores))
dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [None]:
#view contents of dataset
for item in dataset.take(1):
    plt.title(f'Body Score: {item[1].numpy()}')
    plt.imshow(item[0].numpy())
    plt.axis('off')

In [None]:
#shuffle and batch data
dataset = dataset.shuffle(buffer_size=2000)
dataset = dataset.batch(batch_size=32)

In [None]:
#split data into train and val sets
dataset_length = 0
for _ in dataset:
    dataset_length+=1
print(dataset_length)
train_size = int(dataset_length*0.7)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

In [None]:
#create model
base_model = tf.keras.applications.ResNet50(
    include_top=False,
    weights='imagenet',
    pooling='avg',
)

model = Sequential()
model.add(base_model)
# model.add(layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
# model.add(layers.MaxPool2D())
# model.add(layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
# model.add(layers.MaxPool2D())
model.add(layers.Flatten())
model.add(layers.Dense(units=1024, activation='relu'))
model.add(layers.Dense(units=1, activation='relu'))

In [None]:
#compile model
model.compile(loss=losses.mae, optimizer=optimizers.RMSprop())

In [None]:
#train model
model.fit(train_dataset, validation_data=val_dataset, epochs=20, batch_size=32)

In [None]:
#load submission file
df_submit = pd.read_csv('/kaggle/working/body_score_dataset/sample_submission.csv')
df_submit.head()

In [None]:
#prepare test dataset
df_submit['filepath'] = df_submit.apply(lambda row: glob(f'body_score_dataset/**/{row.filename}')[0], axis=1)
df_submit_file_paths = df_submit.filepath.values
test_dataset = tf.data.Dataset.from_tensor_slices((df_submit_file_paths))
test_dataset = test_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(32)

In [None]:
#make predictions
predictions = model.predict(test_dataset)
predictions_flattened = predictions.flatten()
df_submit['bodyScore'] = [5.0 if i>5 else i for i in predictions_flattened]
df_submit[['filename', 'bodyScore']].to_csv('submission.csv', index=False)