Using example from the following website:
https://keras.io/examples/vision/image_classification_from_scratch/

In [1]:
#First we load the following packages:
import os
import numpy as np
import pandas as pd
import keras
from keras import layers
from tensorflow import data as tf_data
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split

In [2]:
# first we need to load the data. 
# Importing the json file
import json
file_path = "dataset.json"

# Open the JSON file and load its contents
with open(file_path, 'r') as file:
    data = json.load(file)
    
data1 = pd.DataFrame(data)
print(data1.head(1))
#14686 games

     appid     release      title  price sentiment reviews percentage  \
0  2805630  2024-02-24  BotMobile   1173  Positive      10        100   

                                                tags  \
0  [Action, Action-Adventure, Adventure, Casual, ...   

                                         screenshots  
0  [2805630_ss_02b69bffacf19807fb6202fb8167fad7e0...  


For computational ease I started by selecting 10% of all the games (1468 games), so that the code can first be written in the correct manner before we start training the model on all instances. 

In [3]:
# Select 10% of the games
num_selected_games = int(len(data1) * 0.1)
selected_games = np.random.choice(data, num_selected_games, replace=False)
print(selected_games)

[{'appid': '2480710', 'release': '2023-10-17', 'title': 'DEATH RATION: BACK TO 19XX', 'price': 1950, 'sentiment': None, 'reviews': None, 'tags': ['1980s', "1990's", '3D', 'Action', 'Action RPG', 'Adventure', 'Aliens', 'Early Access', 'Exploration', 'FPS', 'Indie', 'Inventory Management', 'Investigation', 'Multiple Endings', 'Open World', 'Post-apocalyptic', 'Singleplayer', 'Survival'], 'screenshots': ['2480710_ss_0d3dfa713cfe96e4c0f63310b8fe91b480edb652.1920x1080.jpg', '2480710_ss_2c8722fad47680a1e21fa2a73322a87a10ab307c.1920x1080.jpg', '2480710_ss_2f66ad1b12fd776a70a9d2543fdc145f5dea679e.1920x1080.jpg', '2480710_ss_3fe313b48c859c7c05ff268767e97e820dbb8b5c.1920x1080.jpg', '2480710_ss_4173d55539a85da83b11639d9b8162393f93ec89.1920x1080.jpg', '2480710_ss_475b6517990bce32fa091f2f17a90f9f7018c623.1920x1080.jpg', '2480710_ss_578c121e36b69c8387cd0b6ddca72838fadfe15b.1920x1080.jpg', '2480710_ss_64f2f69578dff0bc6a4f6411d942db90143bfa98.1920x1080.jpg', '2480710_ss_c98d7113adaa3de7993dc39b35bcf5b

Using this dataset we will make a training and test split. This split is done based on the games. The deep learning model will then be built on the video games in the training set and the video games in the test set will be used to evaluate the model.
We will need to match each screenshot with the corresponding price. At the end we have an array with the screenshots and the price of these screenshots split up in a training and a test set. 

In [5]:
selected_games = np.array(selected_games)
# Split data into train and test sets based on games, this function selects the indices of the games that are included in the training set and in the test set.
train_game_indices, test_game_indices = train_test_split(np.arange(len(selected_games)), test_size=0.2)

#Using these indices, screenshots that correspond to a game in the training or test set are included 
train_image_paths, train_labels = [], []
test_image_paths, test_labels = [], []

for idx, game in enumerate(selected_games):
    for screenshot in game['screenshots']:
        if idx in train_game_indices:
            train_image_paths.append(os.path.join('images', screenshot))
            train_labels.append(game['price'])
        else:
            test_image_paths.append(os.path.join('images', screenshot))
            test_labels.append(game['price'])

train_image_paths = np.array(train_image_paths)
train_labels = np.array(train_labels)
test_image_paths = np.array(test_image_paths)
test_labels = np.array(test_labels)

print(train_image_paths)
print(train_labels)


print("Number of training games:", len(train_game_indices))
print("Number of test games:", len(test_game_indices))
print("Number of training images:", len(train_image_paths))
print("Number of test images:", len(test_image_paths))


['images/2480710_ss_0d3dfa713cfe96e4c0f63310b8fe91b480edb652.1920x1080.jpg'
 'images/2480710_ss_2c8722fad47680a1e21fa2a73322a87a10ab307c.1920x1080.jpg'
 'images/2480710_ss_2f66ad1b12fd776a70a9d2543fdc145f5dea679e.1920x1080.jpg'
 ...
 'images/2805170_ss_63733b460134cf469becf527658564f21dfbe5d5.1920x1080.jpg'
 'images/2805170_ss_6db863522799a391ae39803f2c57b054e6a3445a.1920x1080.jpg'
 'images/2805170_ss_a2afe589349144981b9ffe857847476bba726b82.1920x1080.jpg']
[1950 1950 1950 ...  399  399  399]
Number of training games: 1174
Number of test games: 294
Number of training images: 10407
Number of test images: 2448


Next, some pre-processing needs to be performed. The images are scaled, ...

In [6]:
# Define parameters
img_height, img_width = 224, 224  # Resize images to this size
batch_size = 32
num_classes = 1  # Predicting a single value (price)


# Create data generators
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': train_image_paths, 'price': train_labels}),
    x_col="filename",
    y_col="price",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='raw')

test_generator = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': test_image_paths, 'price': test_labels}),
    x_col="filename",
    y_col="price",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='raw')

print(train_generator)


Found 9788 validated image filenames.
Found 2325 validated image filenames.
<keras.src.legacy.preprocessing.image.DataFrameIterator object at 0x29443f680>




In [6]:
# There are some images that are included in the json file as a screenshot, but are not in the folder with images. All of the images for which this is the case is printed below:
for image_path in train_image_paths:
    if not os.path.exists(image_path):
        print(f"Invalid image file: {image_path}")

Invalid image file: images/2808770_ss_12c868462b197c09eaf740fcad7f28968975b0c9.1920x1080.jpg
Invalid image file: images/2808770_ss_1faf1a9b2db934f2da171ab3c4667605210ba7dd.1920x1080.jpg
Invalid image file: images/2808770_ss_542040b1940705ab78c845d89a9cf9a695d9ab88.1920x1080.jpg
Invalid image file: images/2808770_ss_b0eba9f0b20711c4bec83b2cb0c782d83e78a3eb.1920x1080.jpg
Invalid image file: images/2808770_ss_b6912120ae9bb252ca9d71d26c049344dd3a3a24.1920x1080.jpg
Invalid image file: images/2758830_ss_0cbccf86456908fac94dba8a34204b0f6c9d428e.1920x1080.jpg
Invalid image file: images/2758830_ss_342469e8307508041b575ed0782d4fa9d9a2fcc4.1920x1080.jpg
Invalid image file: images/2758830_ss_580152ae990c8a58baf7350e25b134635d10e3f8.1920x1080.jpg
Invalid image file: images/2758830_ss_d9619ec15f2d101608398bebe8ddc13597975762.1920x1080.jpg
Invalid image file: images/2758830_ss_ea28e36f10c5e7de93a4f7bef62a5e50c34611e8.1920x1080.jpg
Invalid image file: images/2764380_ss_0d93c32cf3121a0c31370d16bb7bff4c

In [11]:
from tensorflow.keras.applications import MobileNetV2  # Import MobileNetV2
# Load pre-trained MobileNetV2 without top layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze base model layers
base_model.trainable = False

# Create model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1)
])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5: None -- [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mae'])  # Mean Absolute Error

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=10,
    validation_data=test_generator,
    validation_steps=test_generator.samples // batch_size
)

# Evaluate the model
loss, mae = model.evaluate(test_generator)
print("Validation Mean Absolute Error:", mae)

# Save the model
#model.save('game_price_prediction_model.h5')