#load data and libraries

In [None]:
!pip install pymorphy2
import numpy as np
import pandas as pd
import math
from sklearn.preprocessing import OneHotEncoder
import re, os, pickle

import keras
from keras import Sequential
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

from keras.layers import Input, Embedding, Activation, Flatten, Dense, concatenate
from keras.layers import Conv1D, MaxPooling1D, Dropout, LSTM
from keras.models import Model

!pip install imblearn
from imblearn.over_sampling import RandomOverSampler, SMOTE, BorderlineSMOTE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
with open('/content/drive/bert_embs_val.pickle', 'rb') as f:
  val_values = pickle.load(f)
with open('/content/drive/bert_embs_train.pickle', 'rb') as f:
  train_values = pickle.load(f)

In [None]:
df = pd.read_csv('/content/drive/train.csv', header=None, names = ['text','label'])
df.head()

In [None]:
train_texts = df.text.values

possible_labels = df.label.unique()
label_dict = {}
for index, possible_label in enumerate(possible_labels):
  label_dict[possible_label] = index

df['label'] = df.label.replace(label_dict)
train_labels = df.label.values

In [None]:
df = pd.read_csv('/content/drive/validation.csv', header=None, names = ['text','label'])
df.head()

In [None]:
val_texts = df.text.values
df['label'] = df.label.replace(label_dict)
train_labels = list(train_labels) + list(df.label.values)

In [None]:
with open('/content/drive/td_100_train.pickle', 'rb') as f:
  distributions_train = pickle.load(f)
with open('/content/drive/td_100_val.pickle', 'rb') as f:
  distributions_val = pickle.load(f)

In [None]:
train_data = np.hstack((np.array(train_values),np.array(distributions_train)))
test_data = np.hstack((np.array(val_values),np.array(distributions_val)))

train_data.shape

In [None]:
len(train_labels)

In [None]:
ros = RandomOverSampler(random_state=1)
train_data_resampled, trai_labels_resampled = ros.fit_resample(train_data, train_labels)

In [None]:
train_data = train_data_resampled
train_labels = trai_labels_resampled

train_data.shape

In [None]:
df = pd.DataFrame(train_data)
df['label'] = pd.Series(train_labels)

df = df.sample(frac=1)

train_labels = df.label.values
df = df.drop(columns = 'label')
train_data = df.values

train_data.shape

#ffn

In [None]:
import math
border = math.ceil(len(train_data) * 0.1)

val_data, train_data = train_data[:border], train_data[border:]
val_labels, train_labels = train_labels[:border], train_labels[border:]

In [None]:
train_labels = keras.utils.to_categorical(np.array(train_labels),len(label_dict))
val_labels = keras.utils.to_categorical(np.array(val_labels),len(label_dict))

In [None]:
inputs=Input(shape=(868,), name='input')
x=Dense(2024, activation='tanh', name='fully_connected_2048_tanh')(inputs)
x=Dense(1024, activation='tanh', name='fully_connected_1024_tanh')(x)
predictions=Dense(len(label_dict), activation='softmax', name='output_softmax')(x)
model=Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

from keras.utils import plot_model
plot_model(model, to_file='fnn.png')

In [None]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import pickle

history = model.fit(train_data, train_labels, epochs=5, verbose=2, validation_data=(val_data, val_labels))

predict = np.argmax(model.predict(val_data), axis=1)
answer = np.argmax(val_labels, axis=1)

f1=f1_score(predict, answer, average='macro')*100
prec=precision_score(predict, answer, average='macro')*100
recall=recall_score(predict, answer, average='macro')*100
accuracy=accuracy_score(predict, answer)*100

print(f1)

In [None]:
prediction = model.predict(test_data)

with open('/content/drive/pred_tm.pickle', 'wb') as f:
      pickle.dump(prediction, f)

#Ensembling

In [None]:
labels = {'LO': 0, 'NI': 1, 'DS': 2, 'CL': 3, 'DC': 4, 'SE': 5, 'CR': 6}
inv_labels = {v: k for k, v in labels.items()}
inv_labels

In [None]:
flat_predictions = [inv_labels[f] for f in flat_predictions]
flat_predictions[:10]

In [None]:
with open('/content/drive/predictions1.pickle', 'rb') as f:
  pred1 = pickle.load(f)

with open('/content/drive/predictions2.pickle', 'rb') as f:
  pred2 = pickle.load(f)

with open('/content/drive/predictions3.pickle', 'rb') as f:
  pred3 = pickle.load(f)

In [None]:
final = []
for i in range(len(pred1)):
  final.append(pred1[i]+pred2[1]+pred3[i])
print(final[0].shape)
final[0]

In [None]:
flat_predictions = [item for sublist in final for item in sublist]
flat_predictions[0]