In [None]:
!pip install wandb --quiet

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, LSTM, BatchNormalization, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import wandb
from wandb.keras import WandbCallback

In [None]:
wandb.init(project='TPRedesNeurais', config={'batch_size': 32})

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def load_json_files(folder_path, contador):
    data = []
    try:
      for filename in os.listdir(folder_path):
          if filename.endswith(".json"):
              with open(os.path.join(folder_path, filename), 'r') as file:
                  json_data = json.load(file)
                  data.append(json_data)
    except Exception as e:
      print(f"Error counting files in {folder_path+filename}: {e}")
      print(contador)
      contador +=1
    return data

In [4]:
train_pos_data = load_json_files('/content/drive/MyDrive/Faculdade/6o Periodo/outputs/train/pos', 0)
train_neg_data = load_json_files('/content/drive/MyDrive/Faculdade/6o Periodo/outputs/train/neg', 0)

test_pos_data = load_json_files('/content/drive/MyDrive/Faculdade/6o Periodo/outputs/test/pos', 0)
test_neg_data = load_json_files('/content/drive/MyDrive/Faculdade/6o Periodo/outputs/test/neg', 0)

In [5]:
train_data = train_pos_data + train_neg_data
test_data = test_pos_data + test_neg_data

import random
random.shuffle(train_data)
random.shuffle(test_data)

In [6]:
positive_data = train_pos_data + test_pos_data
negative_data = train_neg_data + test_neg_data
data = positive_data + negative_data

import random
random.shuffle(data)

In [7]:
X = np.array([item['sentence_embedding'] for item in data])
y = np.array([item['label'] for item in data])

test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

In [8]:
train_embeddings = np.array([item['sentence_embedding'] for item in train_data])
train_labels = np.array([item['label'] for item in train_data])

test_embeddings = np.array([item['sentence_embedding'] for item in test_data])
test_labels = np.array([item['label'] for item in test_data])

In [None]:
input_shape = (4096,)

model = tf.keras.Sequential([
    tf.keras.layers.Reshape((1, 4096), input_shape=input_shape),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    #tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [51]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(4096,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
plot_model(model, to_file='/content/drive/MyDrive/Faculdade/6o Periodo/model_architecture.png', show_shapes=True)

Treino utilizando 50/50

In [None]:
history = model.fit(train_embeddings, train_labels, epochs=35, validation_split = 0.1)

In [None]:
# Plotting the training and validation losses
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

In [None]:
test_loss, test_accuracy = model.evaluate(test_embeddings, test_labels)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Treino utilizando 80/20

In [None]:
history = model.fit(X_train, y_train, epochs=35, validation_split = 0.1)

In [None]:
# Plotting the training and validation losses
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
random_index = random.randint(0, len(test_data) - 1)
sample_test_data = test_data[random_index]

sample_test_embedding = np.array([sample_test_data['sentence_embedding']])

prediction = model.predict(sample_test_embedding)

print("Original Data:")
print("Text:", sample_test_data['text'])
print("Sentence Embedding:", sample_test_embedding.flatten())
print("True Label:", sample_test_data['label'])

print("\nModel Prediction:")
print("Predicted Probability:", prediction.flatten())
print("Predicted Label:", int(round(prediction[0][0])))