In [None]:
import torch as t
import numpy as np
import pandas as pd
import preprocessing
from RNN.classifier import RnnTweetsClassifier, LSTMTweetsClassifier
from RNN.tweets_dataset import StancesDataset, CategoriesDataset
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_score, f1_score, recall_score, accuracy_score
from sklearn.metrics import classification_report

In [None]:
# load the dataset and dataloader, train using rnn classifier
dataset = StancesDataset('./dataset/train.csv', 'train')
tweet_classifier = RnnTweetsClassifier(dataset, 50, 50, 2, 3, 120, 0.001)

In [None]:
# train the stances model
tweet_classifier.train()

In [None]:
testset = StancesDataset('./dataset/dev.csv', 'test')
testloader = DataLoader(dataset=testset, batch_size=256)

In [None]:
predicted, labels = tweet_classifier.predict(testloader)
print(classification_report(labels, predicted))

In [None]:
# load the dataset and dataloader
categories_dataset = CategoriesDataset('./dataset/train.csv', 'train')
gategories_classifier = RnnTweetsClassifier(categories_dataset, 50, 50, 2, 10, 120, 0.001)

In [None]:
gategories_classifier.train()

In [None]:
categories_testset = CategoriesDataset('./dataset/dev.csv', 'test')
categories_testloader = DataLoader(dataset=categories_testset, batch_size=256)

In [None]:
predicted, labels = gategories_classifier.predict(categories_testloader)
print(classification_report(labels, predicted))

In [None]:
# edit the test csv file
test_file = pd.read_csv('dataset/test.csv')
if test_file['category'].empty:
    test_file['category'] = ['info'] * len(test_file)
if test_file['stance'].empty:
    test_file['stance'] = [0] * len(test_file)
test_file.to_csv('dataset/test.csv')

In [None]:
# load the testset
stances_test_set = StancesDataset('dataset/test.csv', 'test')
categories_test_set = CategoriesDataset('dataset/test.csv', 'test')
stances_test_set_loader = DataLoader(stances_test_set, batch_size=256)
categories_test_set_loader = DataLoader(categories_test_set, batch_size=256)

In [None]:
# testing the data loader
predicted_stances, _ = tweet_classifier.predict(stances_test_set_loader)
predicted_categories, _ = gategories_classifier.predict(categories_test_set_loader)

In [None]:
# map  the categories
categoriesMap = {
    0: 'info_news',
    1: 'celebrity',
    2: 'plan',
    3: 'requests',
    4: 'rumors',
    5: 'advice',
    6: 'restrictions',
    7: 'personal',
    8: 'unrelated',
    9: 'others'
}
predicted_stances = list(map(lambda x: -1 if x == 2 else int(x), predicted_stances))
predicted_categories = list(map(lambda x: categoriesMap[int(x)], predicted_categories))

In [None]:
# write the predicted data to a csv file
d = {'stances': predicted_stances, 'categories': predicted_categories}
test_csv = pd.DataFrame(data=d, columns=['stances', 'categories'])
test_csv.to_csv('result.csv')