In [None]:
!pip install tensorflow --upgrade
!pip install keras --upgrade
!pip install --upgrade tensorflow-metadata


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.metrics import Precision, Recall, AUC

# Loading the training dataset
url = 'https://raw.githubusercontent.com/afrisenti-semeval/afrisent-semeval-2023/main/data/hau/train.tsv'
df = pd.read_csv(url, sep='\t')

# Extract the reviews and labels from the training dataset
reviews = df['tweet'].values
labels = df['label'].values

# Spliting the training dataset into training and validation sets
train_reviews, val_reviews, train_labels, val_labels = train_test_split(reviews, labels, test_size=0.2, random_state=42)

# Tokenizing the training reviews
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_reviews)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(train_reviews)
max_len = max(len(x) for x in sequences)
train_data = pad_sequences(sequences, maxlen=max_len)

# Tokenizing the validation reviews
val_sequences = tokenizer.texts_to_sequences(val_reviews)
val_data = pad_sequences(val_sequences, maxlen=max_len)

# One-hot encoding the labels for training and validation sets
train_labels = np.array(pd.get_dummies(train_labels))
val_labels = np.array(pd.get_dummies(val_labels))


# Building the model
model = Sequential()
model.add(Embedding(5000, 32, input_length=max_len))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

# Adding precision, recall, and F1 score metrics to the model
precision = Precision(name='precision')
recall = Recall(name='recall')
auc = AUC(name='auc')

# Creating a function to compute the F1 score
def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2*((p*r)/(p+r+1e-7))

# Compiling the model with the new metrics
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', precision, recall, f1_score, auc])

# Training the model
model.fit(train_data, train_labels, validation_data=(val_data, val_labels), epochs=5, batch_size=32)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f8f09716550>