In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras import models, layers, Input
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
# Import Data

train = pd.read_csv('../../Dataset/dataset_training_small.csv', sep=';')
test = pd.read_csv('../../Dataset/dataset_test_small.csv', sep=';')
validation = pd.read_csv('../../Dataset/dataset_validation_small.csv', sep=';')

In [None]:
# Preprocess data

vectorizer = CountVectorizer()

X_train = train.drop('ai_generator', inplace=False, axis=1)
vectorizer.fit(X_train['text'])
X_train = vectorizer.transform(X_train['text']).toarray()

y_train = train['ai_generator']
y_train = y_train.to_numpy()

X_test = test.drop('ai_generator', inplace=False, axis=1)
X_test = vectorizer.transform(X_test['text']).toarray()

y_test = test['ai_generator']
y_test = y_test.to_numpy()

X_validation = validation.drop('ai_generator', inplace=False, axis=1)
X_validation = vectorizer.transform(X_validation['text']).toarray()

y_validation = validation['ai_generator']
y_validation = y_validation.to_numpy()

In [None]:
# Build Model

hidden = 16

n_features = X_train.shape[1]

model = models.Sequential()
model.add(Input((n_features,)))
model.add(layers.Dense(hidden, activation='relu'))
model.add(layers.Dense(hidden, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

# Train Model

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

history = model.fit(X_train, y_train, epochs=20, batch_size=512, validation_data=(X_validation, y_validation))

# Plot accuracy curves

history_dict = history.history
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
epochs = range(1, len(acc_values) + 1)

plt.plot(epochs, acc_values, 'bo', label='Training acc')
plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

# Plot loss curves

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Evaluate Model
results = model.evaluate(X_test, y_test, verbose = 0)
results

# Extreme low loss and perfect accuracy
# Perguntar ao stor o que significa