In [None]:
!unzip archive.zip

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.metrics import accuracy_score

In [None]:
class Model1:

  def _load_dataset(self, path):
    df = pd.read_csv(path, header = None)
    df.rename(columns={3: 'tweet', 2: 'sentiment'}, inplace=True)
    df['tweet'] = df['tweet'].astype(str)
    df = df[['sentiment', 'tweet']]
    label_encoder = LabelEncoder()
    df['sentiment'] = label_encoder.fit_transform(df['sentiment'])
    self.classes = label_encoder.classes_
    return df['tweet'], df['sentiment']

  def _build_corpus(self, X_train):
    self.tokenizer = Tokenizer()
    self.tokenizer.fit_on_texts(X_train)
    word_index = self.tokenizer.word_index
    self.vocab_size = len(word_index) + 1

  def _transform_text(self, X_train):
    sequences = self.tokenizer.texts_to_sequences(X_train)
    self.max_sequence_length = max(len(seq) for seq in sequences)
    self.X_train_padded = pad_sequences(sequences, maxlen=self.max_sequence_length)

  def _predict(self, X_test):
    test_sequences = self.tokenizer.texts_to_sequences(X_test)
    padded_test_sequences = pad_sequences(test_sequences, maxlen=self.max_sequence_length)
    matrix = self.model.predict(padded_test_sequences)
    predictions = [np.argmax(row) for row in matrix]
    return predictions

  def _train(self):
    X_train, y_train = self._load_dataset('twitter_training.csv')
    self._build_corpus(X_train)
    self._transform_text(X_train)
    embedding_dim = 100
    self.model = Sequential([
      Embedding(self.vocab_size, embedding_dim, input_length=self.max_sequence_length),
      LSTM(units=32),
      Dense(4, activation='softmax')
    ])
    self.model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    self.model.fit(self.X_train_padded, y_train, epochs=1, verbose=2)

  def _test(self):
    X_test, y_test = self._load_dataset("twitter_validation.csv")
    predictions = self._predict(X_test)
    self.accuracy = accuracy_score(y_test, predictions)
    return self.accuracy

In [None]:
model = Model1()

In [None]:
model._train()

2334/2334 - 271s - loss: 0.7219 - accuracy: 0.7175 - 271s/epoch - 116ms/step


In [None]:
model._test()



0.934

In [None]:
model.accuracy

0.934

In [None]:
model = load_model("model1.h5")