In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_data = pd.read_csv("/kaggle/input/cs-480-2024-spring/data/train.csv")
test_data = pd.read_csv("/kaggle/input/cs-480-2024-spring/data/test.csv")

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

X_train = train_data.iloc[:, 1:164]
y_train = train_data.iloc[:, 164:]
X_test = test_data.iloc[:, 1:164]

def normalize_image(path):
    image = load_img(path, target_size=(128, 128))
    image = img_to_array(image)
    image = image / 255.0
    return image

train_images = np.array([normalize_image(f'/kaggle/input/cs-480-2024-spring/data/train_images/{img_id}.jpeg') for img_id in train_data['id']])
test_images = np.array([normalize_image(f'/kaggle/input/cs-480-2024-spring/data/test_images/{img_id}.jpeg') for img_id in test_data['id']])

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = scaler.fit_transform(y_train)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Concatenate

image_input = Input(shape=(128, 128, 3))
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

labels = Input(shape=(163,))
y = Dense(64, activation='relu')(labels)
y = Dense(128, activation='relu')(y)

combined = Concatenate()([x, y])
z = Dense(128, activation='relu')(combined)
z = Dense(y_train.shape[1], activation='linear')(z)

model = Model(inputs=[image_input, labels], outputs=z)
model.compile(optimizer='adam', loss='mse')

In [None]:
import matplotlib.pyplot as plt

history = model.fit(
    [train_images, X_train], y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32
)

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
predictions = model.predict([test_images, X_test])

rescaled = scaler.inverse_transform(predictions)

submission = pd.DataFrame(rescaled, columns=['X4', 'X11', 'X18', 'X26', 'X50', 'X3112'])
submission.insert(0, 'id', test_data['id'])
submission.to_csv('submission.csv', index=False)