# Downlaod datasets

In [None]:
import os
import zipfile
import shutil
import numpy as np
import glob
from numpy.linalg import norm

In [26]:
dataset_dir_face = "./gerador-mensagens/database_face"
extract_path_face = f"{dataset_dir_face}/images"
zip_path_face = f"{dataset_dir_face}/facial-emotion-expressions.zip"
path_train_path_face = f'{dataset_dir_face}/images/train'
path_validation_path_face = f'{dataset_dir_face}/images/validation'
dataset_dir_futebol = "./gerador-mensagens/database_futebol"

In [30]:
os.makedirs(dataset_dir_face, exist_ok=True)

if not os.path.exists(zip_path_face):
    !curl -L -o {zip_path_face} https://www.kaggle.com/api/v1/datasets/download/samaneheslamifar/facial-emotion-expressions

if not os.path.exists(extract_path_face):
    zipfile.ZipFile(zip_path_face, 'r').extractall(dataset_dir_face)

classes = os.listdir(path_train_path_face)
for folder in classes:
    if folder not in ["happy", "sad"]:
        shutil.rmtree(os.path.join(path_train_path_face, folder))
        shutil.rmtree(os.path.join(path_validation_path_face, folder))
    else:
        train_files = glob.glob(os.path.join(path_train_path_face, folder, '*'))
        validation_files = glob.glob(os.path.join(path_validation_path_face, folder, '*'))
        
        for i, file_path in enumerate(validation_files):
            new_file_path = os.path.join(path_validation_path_face, folder, f"{folder}_{i+1}.jpg")
            os.rename(file_path, new_file_path)
            
        for i, file_path in enumerate(train_files):
            new_file_path = os.path.join(path_train_path_face, folder, f"{folder}_{i+1}.jpg")
            os.rename(file_path, new_file_path)

if os.path.exists(f"{extract_path_face}/images"):
    shutil.rmtree(f"{extract_path_face}/images")

In [27]:
if not os.path.exists(f"{dataset_dir_futebol}/futebol_logos.zip"):
    os.makedirs(dataset_dir_futebol, exist_ok=True)
    !curl -L -o {dataset_dir_futebol}/futebol_logos.zip https://github.com/luukhopman/football-logos/archive/refs/heads/master.zip

if not os.path.exists(f"{dataset_dir_futebol}/database_futebol"):
    zipfile.ZipFile(f"{dataset_dir_futebol}/futebol_logos.zip", 'r').extractall(dataset_dir_futebol)
    os.rename(f"{dataset_dir_futebol}/football-logos-master", f"{dataset_dir_futebol}/database_futebol")


# Criação dos modelos

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

In [None]:
database_train = image_dataset_from_directory(
    path_train_path_face,
    image_size=(48, 48),
    batch_size=32,
    label_mode="categorical",
    color_mode="grayscale",
    seed=123,
    shuffle=True
).map(lambda x, y: (x / 255.0, y))

database_validation = image_dataset_from_directory(
    path_validation_path_face,
    image_size=(48, 48),
    batch_size=32,
    label_mode="categorical",
    color_mode="grayscale",
    seed=123,
    shuffle=True
).map(lambda x, y: (x / 255.0, y))

model = models.Sequential([
    layers.InputLayer(input_shape=(48, 48, 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(database_train, validation_data=database_validation, epochs=10)

In [None]:
model.save('./consumidor-sentimento/model.h5',  include_optimizer=False) 

In [None]:
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
embedding_model = Model(inputs=base_model.input,outputs=tf.keras.layers.GlobalAveragePooling2D()(base_model.output))

path_futebol_logos = glob.glob(f"{dataset_dir_futebol}/database_futebol/logos/*/*.png")

imgs = []
labels = []

for img_path in path_futebol_logos:
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    imgs.append(img_array)
    label = os.path.basename(img_path).split('.')[:-1]
    label = '.'.join(label)
    labels.append(label)


imgs = np.array(imgs)
imgs = preprocess_input(imgs)
embeddings = embedding_model.predict(imgs, batch_size=32, verbose=1)
labels = np.array(labels)

print("Embeddings shape:", embeddings.shape)
print("Labels shape:", labels.shape)

Embeddings shape: (398, 1280)
Labels shape: (398,)


In [None]:
np.savetxt("consumidor-times/futebol_embeddings.txt", embeddings, delimiter=' ', fmt='%.6f', encoding="utf-8")
with open("consumidor-times/futebol_labels.txt", "w", encoding="utf-8") as f:
    for label in labels:
        f.write(label + "\n")

In [None]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (norm(a) * norm(b))

def predict(embedding, embeddings_train, labels_train):
    sims = [cosine_similarity(embedding, e) for e in embeddings_train]
    return labels_train[np.argmax(sims)]

data = np.load(f"{dataset_dir_futebol}/futebol_embeddings_labels.npz")
embeddings = data['embeddings']
labels = data['labels']
y_pred = [predict(e, embeddings, labels) for e in embeddings]

accuracy = np.mean(np.array(y_pred) == labels)
print(f"Training accuracy: {accuracy * 100:.2f}%")

Training accuracy: 100.00%
