In [None]:
import sys
MODULE_DIR = "/workspaces/bartesian"

if MODULE_DIR not in sys.path:
    sys.path.append(MODULE_DIR)

In [None]:
from bartesian.barcodes import (
    isolate_barcodes,
    resize_barcodes,
    barcode_to_widths,
    normalize_barcodes,
    BarcodeVocabulary,
    draw_barcode,
)

from bartesian.datasheet import (load_data, DataSheetRecord)

from bartesian.configs import (LIQUOR_ORDER, STRENGTH_ORDER)

In [None]:
import os
import cv2
import numpy as np
from glob import glob
from pathlib import Path

In [None]:
IMAGE_DIR = Path("/workspaces/bartesian/images/raw_images")
CROPPED_IMAGE_DIR = Path("/workspaces/bartesian/images/cropped_images")
RESIZED_IMAGE_DIR = Path("/workspaces/bartesian/images/resized_images")

isolate_barcodes(IMAGE_DIR, CROPPED_IMAGE_DIR)

while True:
    response = input("Deleted all invalid barcodes? (y/n)").lower()
    if response == "n":
        raise Exception("Please delete all invalid barcodes before continuing")
    if response == "y":
        break

TARGET_BARCODE_RESOLUTION = (1024, 530)
resize_barcodes(
    CROPPED_IMAGE_DIR,
    RESIZED_IMAGE_DIR,
    resolution=TARGET_BARCODE_RESOLUTION
)

In [None]:

# Process each barcode image in the directory
barcode_widths_dict: dict[str, list[int]] = {}
barcodes = glob(os.path.join(RESIZED_IMAGE_DIR, '*'))
for barcode_image in barcodes:
    widths = barcode_to_widths(barcode_image)
    barcode_widths_dict[barcode_image] = widths

# barcode_widths_dict now contains the representation of each barcode


In [None]:
for vocab_size in range(13,14):
    test_size = normalize_barcodes(
        barcode_widths_dict,
        vocabulary_size=vocab_size
    )
    print(vocab_size)
    draw_barcode(
        test_size["barcodes"]["/workspaces/bartesian/images/resized_images/0[PXL_20240121_013857214.MP].jpg"],
        test_size["thickness"],
        .3
    )

In [None]:
all_record_json = load_data()
all_records = {i["drink"]: i for i in all_record_json}
records = {i["drink"]: i for i in all_record_json if i["image"]}

In [None]:
from bartesian.ml.utilities import record_to_matrix
from bartesian.datasheet import review_datasheet

In [None]:
review_datasheet(all_record_json)

In [None]:
from bartesian.enums import Drink

In [None]:
drink_to_filename: dict[Drink, str] = {
    Drink.SEX_ON_THE_BEACH: 'PXL_20240121_013747782.jpg',
    Drink.PASSION_FRUIT_MARGARITA: 'PXL_20240121_013803629.MP.jpg',
    Drink.COSMOPOLITAN: 'PXL_20240121_013815977.MP.jpg',
    Drink.BEES_KNEES: 'PXL_20240121_013832187.MP.jpg',
    Drink.PINEAPPLE_MARGARITA: 'PXL_20240121_013842468.MP.jpg',
    Drink.ESPRESSO_MARTINI: 'PXL_20240121_013850719.MP.jpg',
    Drink.WHISKEY_SOUR: 'PXL_20240121_013857214.MP.jpg',
    Drink.SIDECAR: 'PXL_20240121_013909370.jpg',
    Drink.BLACKBERRY_MARGARITA: 'PXL_20240121_184839130.jpg',
    Drink.APPLE_PIE: 'PXL_20240121_013724688.jpg',
    Drink.LEMON_DROP: 'PXL_20240121_184849045.jpg',
    Drink.MANHATTAN: 'PXL_20240121_013733706.MP.jpg',
    Drink.OLD_FASHIONED: 'PXL_20240121_184912979.jpg',
}

file_name_to_drink = {v:k for k,v in drink_to_filename.items()}

In [None]:
barcode_results = normalize_barcodes(
    barcode_widths_dict,
    vocabulary_size=13
)

In [None]:
records

In [None]:
records

In [None]:
training_data = []
for k,v in barcode_results["barcodes"].items():
    try:    
        target_file = Path(k).name.replace("0[","").replace("].jpg",".jpg")
        drink = file_name_to_drink[target_file]
        print(target_file, drink)
        training_data.append({
            "filename": k,
            "input": v,
            "output": record_to_matrix(records[drink])
        })
    except KeyError:
        pass

In [None]:
training_data

strengths: set[float] = set()
for v in records.values():
    for strength in STRENGTH_ORDER:
        strengths.add(v[strength])

strength_categories = dict(zip(range(len(strengths)), sorted(strengths)))
rev_strength_categories = {v:k for k,v in strength_categories.items()}

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense, Reshape, Conv1D, Flatten, MaxPooling1D, Dropout,
    Input, Concatenate
)
import random

In [None]:
from sklearn.model_selection import KFold
import numpy as np

In [None]:
def create_model(input_length):
    input_layer = Input(shape=(input_length, 1))

    # Create conv layers with different kernel sizes
    conv_layers = []
    for kernel_size in range(1, 6):  # Kernel sizes from 1 to 5
        conv_layer = Conv1D(filters=32, kernel_size=kernel_size, activation='relu', padding='same')(input_layer)
        conv_layers.append(conv_layer)

    # Concatenate the outputs of the conv layers
    concatenated = Concatenate()(conv_layers)

    # Flatten before passing to the fully connected layers
    flattened = Flatten()(concatenated)

    # Fully connected layers

    dense = Dense(64, activation='relu')(flattened)
    dropout = Dropout(0.5)(dense)


    # final = Dense(20, activation='softmax')(dropout3)  # Assuming 20 units for output
    final = Dense(20, activation='sigmoid')(dropout)  # Assuming 20 units for output
    output = Reshape((5,4))(final)

    # Create and compile the model
    model = Model(inputs=input_layer, outputs=output)
    # model.compile(optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model


In [None]:
training_data

In [None]:
EPOCHS = 100
INPUT_SIZE = 37

random.shuffle(training_data)
X = np.array([i["input"] for i in training_data])
Y = np.array([i["output"] for i in training_data])

# K-Fold Cross-Validation
kf = KFold(n_splits=len(X), shuffle=True)  # Leave-One-Out Cross-Validation
fold_no = 1
for train_index, val_index in kf.split(X):
    # Split data into training and validation sets
    X_train, X_val = X[train_index], X[val_index]
    Y_train, Y_val = Y[train_index], Y[val_index]

    # Create a new model for each fold
    model = create_model(INPUT_SIZE)

    # Train the model
    print(f'Training for fold {fold_no} ...')
    model.fit(X_train, Y_train, epochs=EPOCHS)

    # Evaluate the model
    scores = model.evaluate(X_val, Y_val, verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores}')
    fold_no += 1


In [None]:

# To make a prediction:
prediction = model.predict(np.array([training_data[0]["input"]]))
predicted_matrix = prediction.reshape(5, 4)

print(training_data[0]['filename'].replace("0-", ""))
# print(output_to_continuous(predicted_matrix, strength_categories))
print(np.around(predicted_matrix, decimals=1))