In [1]:
import os, sys
import xml.etree.ElementTree as ET
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import img_to_array

sys.path.append('/Users/captainrib/workspace/project-ares/ares-data-processing')

In [2]:
from data.training.dataset import CandlestickDataset

## Prepare the labeled data

In [3]:
annotations_folder = '/Users/captainrib/workspace/project-ares/ares-data-processing/data/images/labeled'
labeled_data_folder = '/Users/captainrib/workspace/project-ares/ares-data-processing/data/images/'
image_folder = '/Users/captainrib/workspace/project-ares/ares-data-processing/data/images/30min_segments'

# Function to parse a single XML file
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    image_filename = root.find("filename").text
    labels = []
    
    for obj in root.findall("object"):
        label = obj.find("name").text
        labels.append(label)
        
    return image_filename, labels

# Parse all XML files in the annotations folder
annotations = []

for xml_file in os.listdir(annotations_folder):
    if xml_file.endswith(".xml"):
        image_filename, labels = parse_annotation(os.path.join(annotations_folder, xml_file))
        for label in labels:
            annotations.append({"image_filename": image_filename, "label": label})

# Create a DataFrame and save it as a CSV file
labeled_data = pd.DataFrame(annotations)
labeled_data.to_csv('{}labeled_data.csv'.format(labeled_data_folder), index=False)

## Split labeled data to training, validation and testing

In [4]:
# Read the labeled data (replace this with your actual labeled data)
data = pd.read_csv('{}labeled_data.csv'.format(labeled_data_folder))

# Split the dataset into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.3, stratify=data['label'], random_state=42)

# Split the remaining test_data into validation and testing sets
val_data, test_data = train_test_split(test_data, test_size=0.5, stratify=test_data['label'], random_state=42)

# Save the training, validation, and testing sets as separate CSV files
train_data.to_csv('{}train_data.csv'.format(labeled_data_folder), index=False)
val_data.to_csv('{}val_data.csv'.format(labeled_data_folder), index=False)
test_data.to_csv('{}test_data.csv'.format(labeled_data_folder), index=False)

In [5]:
train_data.iloc[1]["image_filename"]

'segment_2023-03-15_87.png'

## Load dataset

In [10]:
batch_size = 32
input_size = (540, 320)

train_dataset = CandlestickDataset('{}train_data.csv'.format(labeled_data_folder), image_folder, annotations_folder, input_size)
val_dataset = CandlestickDataset('{}val_data.csv'.format(labeled_data_folder), image_folder, annotations_folder, input_size)
test_dataset = CandlestickDataset('{}test_data.csv'.format(labeled_data_folder), image_folder, annotations_folder, input_size)

In [11]:
def visualize_sample(image, bboxes, labels):
    image_copy = np.copy(image)
    
    for bbox, label in zip(bboxes, labels):
        x_min, y_min, x_max, y_max = bbox
        cv2.rectangle(image_copy, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        cv2.putText(image_copy, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    cv2.imshow("Image", image_copy)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

def create_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))

    return model

input_shape = (320, 540, 3)  # Replace with the input size you chose
num_classes = 1  # The number of classes in your problem (binary classification in this case)

model = create_model(input_shape, num_classes)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 318, 538, 32)      896       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 159, 269, 32)     0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 157, 267, 64)      18496     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 78, 133, 64)      0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 76, 131, 64)       36928     
                                                                 
 flatten_1 (Flatten)         (None, 637184)           

In [13]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

train_data_gen = tf.data.Dataset.from_generator(
    lambda: train_dataset,
    output_signature=(
        tf.TensorSpec(shape=(320, 540, 3), dtype=tf.uint8),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
).batch(batch_size)

val_data_gen = tf.data.Dataset.from_generator(
    lambda: val_dataset,
    output_signature=(
        tf.TensorSpec(shape=(320, 540, 3), dtype=tf.uint8),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
).batch(batch_size)

# Train the model
epochs = 10
history = model.fit(train_data_gen,
                    validation_data=val_data_gen,
                    epochs=epochs)

Epoch 1/10


2023-03-23 23:53:18.101482: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


     12/Unknown - 44s 3s/step - loss: 2.5546 - accuracy: 1.0000

2023-03-23 23:54:02.436774: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
history

<keras.callbacks.History at 0x13fbc2c40>

In [28]:
test_img_path = '/Users/captainrib/workspace/project-ares/ares-data-processing/data/images/testcases/neg3.png'
image = cv2.imread(test_img_path)

In [29]:
input_size = (540, 320)
preprocessed_image = cv2.resize(image, input_size)
preprocessed_image = preprocessed_image.astype("float32") / 255.0
image_tensor = np.expand_dims(preprocessed_image, axis=0)
predictions = model.predict(image_tensor)

# Interpret the predictions
threshold = 0.5  # Adjust the threshold according to your needs
if predictions[0][0] > threshold:
    print("The bullish flag pattern is present in the image.")
else:
    print("The bullish flag pattern is not present in the image.")

The bullish flag pattern is present in the image.
