In [40]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [2]:
training = pd.read_csv('training.csv')
testing = pd.read_csv('test.csv')
# input -1 for missing values
imputer = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-1)

# This code was removing column names, my changes rename the columns so they keep original values now
# Convert back to dataframe and replace the missing values
training = pd.DataFrame(imputer.fit_transform(training), columns=training.columns)

y_training = training.loc[:, :"mouth_center_bottom_lip_y"]


# Get the last feature column
lastFeature = training.iloc[:, -1]

# Split the last feature by spaces
splitFeatures = lastFeature.str.split(' ', expand=True)

# Drop the original last feature and concatenate the new split features
training = pd.concat([training.drop(training.columns[-1], axis=1), splitFeatures], axis=1)


# I drop the first columns here so they aren't mislabeled, and this way we can just make this the X_train
training.drop(training.columns[range(0, 30)], axis=1, inplace=True)


# This was renaming all target columns to pixel_i as well--code added before getting the last feature column fixes this
# Rename the columns to pixel_i
training.columns = [f'pixel_{i}' for i in range(training.shape[1])]


# Same for testing
testImg = testing.iloc[:, -1]
splitTest = testImg.str.split(' ', expand=True)
testing = pd.concat([testing.drop(testing.columns[-1], axis=1), splitTest], axis=1)

# there was an extra column that was just the index
testing.drop(columns=testing.columns[[0]], inplace=True)

# Rename the columns to pixel_i
testing.columns = ['ImageId'] + [f'pixel_{i}' for i in range(1, testing.shape[1])]

In [34]:
# Need to split data into train and test splits
X_train, X_test, y_train, y_test = train_test_split(training, y_training, train_size=0.8, test_size=0.2, random_state=10)

In [35]:
# Reshape dataframe into numpy array so I can reshape it again
X_train = X_train.to_numpy(dtype=np.float32)

# Reshape image data so the model can process it as a Tensor
X_train = X_train.reshape(5639, 96, 96, 1)
X_train = X_train / 255.0

# Reshape y_train into a numpy array as well
y_train = np.array(y_train, dtype=np.float32)

# Reshape test data
X_test = X_test.to_numpy(dtype=np.float32)
X_test = X_test.reshape(1410, 96, 96, 1)
X_test = X_test / 255.0

y_test = np.array(y_test, dtype=np.float32)

In [36]:
# training some basic convolutional models based off of our homework
# this has to be a regression model (given that target outputs are floats) so it needs to be a little different

model = tf.keras.models.Sequential([
      tf.keras.layers.Conv2D(26, (3,3), activation='relu', input_shape=(96, 96, 1)),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(30)
])
model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='mean_squared_error', metrics=['mean_squared_error'])
model.fit(X_train_finished, y_train, epochs=10, batch_size=32)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - loss: 1286.0601 - mean_squared_error: 1286.0601
Epoch 2/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 917.4705 - mean_squared_error: 917.4705
Epoch 3/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 502.5334 - mean_squared_error: 502.5334
Epoch 4/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 216.3811 - mean_squared_error: 216.3811
Epoch 5/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 34ms/step - loss: 112.2513 - mean_squared_error: 112.2513
Epoch 6/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 76.1566 - mean_squared_error: 76.1566
Epoch 7/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 65.9440 - mean_squared_error: 65.9440
Epoch 8/10
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x771721b7b7a0>

In [41]:
predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error (MSE): {mse}')

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Mean Squared Error (MSE): 175.158203125
