In [8]:
import os
import random
import shutil

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential

print(tf.__version__)

2.12.0-rc1


In [2]:
# # do this for keras once

# train_images = os.listdir('screens_training')
# train_images = [t for t in train_images if '.jpg' in t]

# for (name, val) in zip(train_images, y):
#     if val == 1:
#         shutil.move(f'screens_training\\{name}', f'screens_training\\True\\{name}')
#     elif val == 0:
#         shutil.move(f'screens_training\\{name}', f'screens_training\\False\\{name}')


In [4]:
image_size = (256, 256)
train_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_training_balanced_00001_40000', 
                                                       image_size=image_size,
                                                       label_mode='binary',
                                                       seed=42,
                                                       shuffle=True,
                                                       subset='training',
                                                       validation_split=0.2)

Found 14918 files belonging to 2 classes.
Using 11935 files for training.


In [5]:
validation_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_training_balanced_00001_40000', 
                                                       image_size=image_size,
                                                       label_mode='binary',
                                                       seed=42,
                                                            shuffle=True,
                                                       subset='validation',
                                                       validation_split=0.2)

Found 14918 files belonging to 2 classes.
Using 2983 files for validation.


In [6]:
# sanity check the sets have no common part
for name in validation_ds.file_paths:
    if name in train_ds.file_paths:
        print('error')

In [7]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [20]:
model = Sequential([
  layers.Rescaling(1./255, input_shape=(256, 256, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(64, activation='relu'),
  layers.Dense(1, activation='sigmoid')
])

In [21]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

In [22]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-04),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=METRICS)

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 256, 256, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 256, 256, 16)      448       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 128, 128, 16)     0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 128, 128, 32)      4640      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 64, 64, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 64, 64, 64)       

In [25]:
model.fit(train_ds, 
          epochs=20,
          validation_data=validation_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a84159d0d0>

In [26]:
model.save('model3')



INFO:tensorflow:Assets written to: model3\assets


INFO:tensorflow:Assets written to: model3\assets


In [27]:
test_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_10001_11000_unlabeled', 
                                                       image_size=image_size, 
                                                      labels=None,
                                                     shuffle=False) # ważne

Found 1000 files belonging to 1 classes.


In [33]:
test_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_40001_50000_unlabeled', 
                                                       image_size=image_size, 
                                                      labels=None,
                                                     shuffle=False) # ważne

Found 10000 files belonging to 1 classes.


In [34]:
predictions = model.predict(test_ds)



In [35]:
game_states_data_training = pd.read_csv('game_states_data_training.csv')

X = game_states_data_training
X = X[X['instance_id'] >= 40001]
y_test = X['frag'].astype(int).values

y_pred = predictions

print(f'\nTest score: {roc_auc_score(y_test, y_pred)}')


Test score: 0.6225310757823442


In [36]:
with open('img_pred_40001_50000.txt', 'w') as f:
    for y in y_pred:
        f.write(f'{y[0]}\n')

In [None]:
# # RUN IT ON DEMAND ONLU

# false_training_images_to_move = os.listdir('screens_training/False')
# i = 0
# no_files = 7459
# while i < no_files:
#     file = random.choice(false_training_images_to_move)
#     if int(file[:5]) <= 40000:
#         shutil.copyfile(f'screens_training/False/{file}', f'screens_training_balanced_00001_40000/False/{file}')
#         false_training_images_to_move.remove(file)
#         i += 1

In [32]:
false_training_images_to_move = os.listdir('screens_training/False')
for file in false_training_images_to_move:
    if int(file[:5]) >= 40000:
        shutil.copyfile(f'screens_training/False/{file}', f'screens_40001_50000_unlabeled/{file}')

In [13]:
saved_model = tf.keras.models.load_model('model3')

# Check its architecture
saved_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 256, 256, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 256, 256, 16)      448       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 128, 128, 16)     0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 128, 128, 32)      4640      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 64, 64, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 64, 64, 64)       

In [14]:
# official testset predictions

test_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_test', 
                                                       image_size=image_size, 
                                                      labels=None,
                                                     shuffle=False) # ważne

y_pred = saved_model.predict(test_ds)

with open('img_screens_test_res.txt', 'w') as f:
    for y in y_pred:
        f.write(f'{y[0]}\n')

Found 10000 files belonging to 1 classes.


In [10]:
test_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_40001_50000_unlabeled', 
                                                       image_size=image_size, 
                                                      labels=None,
                                                     shuffle=False) # ważne

predictions = saved_model.predict(test_ds)

game_states_data_training = pd.read_csv('game_states_data_training.csv')

X = game_states_data_training
X = X[X['instance_id'] >= 40001]
y_test = X['frag'].astype(int).values

y_pred = predictions

print(f'\nTest score: {roc_auc_score(y_test, y_pred)}')

Found 10000 files belonging to 1 classes.

Test score: 0.6225310757823442


In [11]:
saved_model.fit(train_ds, 
                epochs=10,
                validation_data=validation_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27464541cd0>

In [12]:
test_ds = tf.keras.utils.image_dataset_from_directory(directory='screens_40001_50000_unlabeled', 
                                                       image_size=image_size, 
                                                      labels=None,
                                                     shuffle=False) # ważne

predictions = saved_model.predict(test_ds)

game_states_data_training = pd.read_csv('game_states_data_training.csv')

X = game_states_data_training
X = X[X['instance_id'] >= 40001]
y_test = X['frag'].astype(int).values

y_pred = predictions

print(f'\nTest score: {roc_auc_score(y_test, y_pred)}')

Found 10000 files belonging to 1 classes.

Test score: 0.6124977766482134
