# MonReader
---
## Summary

**Data Description:**

We collected page flipping video from smart phones and labelled them as flipping and not flipping.

We clipped the videos as short videos and labelled them as flipping or not flipping. The extracted frames are then saved to disk in a sequential order with the following naming structure: VideoID_FrameNumber

**Goal(s):**

Predict if the page is being flipped using a single image.

**Success Metrics:**

Evaluate model performance based on F1 score, the higher the better.

**Bonus(es):**

Predict if a given sequence of images contains an action of flipping.


# Setup

## Import Libraries

In [13]:
# Standards
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from scipy import stats
import statistics
import tensorflow as tf

# For Data Import
from pathlib import Path

# For Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.metrics import classification_report
from keras import backend as K

# Warnings
import warnings

# Suppress deprecation warnings
#warnings.filterwarnings('ignore', category=FutureWarning)
#warnings.filterwarnings('ignore', category=UserWarning)

## Import Data

Image data should be preprocessed before feeding into a computer vision model. Here, we've included normalization and resizing of the data.

In [3]:
# Define the pre-processing functions
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image /= 255.0  # normalize to [0,1] range
    return image

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

def load_and_preprocess_from_path_label(path, label):
    return load_and_preprocess_image(path), label

# Get image paths and labels for training and testing datasets
def get_image_paths_and_labels(directory):
    flip_image_paths = [str(path) for path in list(Path(directory + '/flip').glob('*.jpg'))]
    notflip_image_paths = [str(path) for path in list(Path(directory + '/notflip').glob('*.jpg'))]
    flip_labels = [1]*len(flip_image_paths)
    notflip_labels = [0]*len(notflip_image_paths)
    return flip_image_paths + notflip_image_paths, flip_labels + notflip_labels

train_image_paths, train_image_labels = get_image_paths_and_labels(r"C:\ref\images\training")
test_image_paths, test_image_labels = get_image_paths_and_labels(r"C:\ref\images\testing")

batch_size = 32 # Define batch size

# Create Datasets
train_path_ds = tf.data.Dataset.from_tensor_slices((train_image_paths, train_image_labels))
train_image_label_ds = train_path_ds.map(load_and_preprocess_from_path_label)
train_ds = train_image_label_ds.shuffle(buffer_size=len(train_image_labels)).batch(batch_size)

test_path_ds = tf.data.Dataset.from_tensor_slices((test_image_paths, test_image_labels))
test_image_label_ds = test_path_ds.map(load_and_preprocess_from_path_label)
test_ds = test_image_label_ds.shuffle(buffer_size=len(test_image_labels)).batch(batch_size)


# Modelling

## General

### Setup

In [15]:
# Define a CNN model

model = Sequential() # Base liner layer model
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3))) # 1st layer
model.add(MaxPooling2D((2, 2))) # Reduce size of convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu')) # 2nd layer
model.add(MaxPooling2D((2, 2))) # Reduce size of convolutional layer
model.add(Flatten()) # Convert to vector for dense layers
model.add(Dense(64, activation='relu')) # 3rd layer - dense
model.add(Dense(1, activation='sigmoid')) # Output Binary layer: flip or not flip


### Evaluation

In [14]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', precision_m, recall_m, f1_m])

# fit the model
history = model.fit(train_ds, epochs=5, validation_data=test_ds)

# evaluate the model
loss, accuracy, precision, recall, f1_score = model.evaluate(test_ds)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
