In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Bidirectional, SimpleRNN, GRU, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import gensim.downloader as api


glove_twitter = api.load("glove-twitter-100")


def text_to_glove_vector(text, model, embedding_dim):
    words = text.lower().split()
    vectors = [model[word] for word in words if word in model.key_to_index]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(embedding_dim)




In [None]:

from sklearn.decomposition import PCA
def apply_glove_conversion(data, model):
    data['GloVe_Vector_Tweet'] = data['Tweet'].apply(lambda x: text_to_glove_vector(x, model, 100))
    data['GloVe_Vector_Target'] = data['Target'].apply(lambda x: text_to_glove_vector(x, model,100))
    data['Combined_Vector'] = data.apply(lambda row: np.append(row['GloVe_Vector_Tweet'], row['GloVe_Vector_Target']), axis=1)
    return data



d = pd.read_csv("/content/new_sem_eval_preprocessed.csv")
from keras.models import Sequential
d = apply_glove_conversion(d, glove_twitter)

le= LabelEncoder()
d['Stance'] = le.fit_transform(d['Stance'])

X = np.vstack(d['Combined_Vector'])
y = d['Stance']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7658)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001, restore_best_weights=True)

pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

rf_classifier = RandomForestClassifier(n_estimators=500, random_state=42)
rf_classifier.fit(X_train_pca, y_train)
rf_pred = rf_classifier.predict(X_test_pca)

xgb_classifier = XGBClassifier(random_state=42)
xgb_classifier.fit(X_train_pca, y_train)
xgb_pred = xgb_classifier.predict(X_test_pca)


y_train_one_hot = tf.keras.utils.to_categorical(y_train)
y_test_one_hot = tf.keras.utils.to_categorical(y_test)


X_train=X_train_pca.reshape((X_train_pca.shape[0], 1, X_train_pca.shape[1]))
X_test=X_test_pca.reshape((X_test_pca.shape[0], 1, X_test_pca.shape[1]))
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(100, return_sequences=True), input_shape=(None, 50)))
lstm_model.add(Bidirectional(LSTM(50)))
lstm_model.add(Dense(3, activation='softmax'))
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

lstm_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

lstm_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
lstm_pred = np.argmax(lstm_model.predict(X_test), axis=-1)

bilstm_input = Input(shape=(None, 50))
bilstm_out = Bidirectional(LSTM(100, return_sequences=True))(bilstm_input)
bilstm_out = Bidirectional(LSTM(50))(bilstm_out)
bilstm_out = Dense(64, activation='relu')(bilstm_out)
bilstm_out = Dense(3, activation='softmax')(bilstm_out)

bilstm_model = Model(inputs=bilstm_input, outputs=bilstm_out)
bilstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

bilstm_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

bilstm_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
bilstm_pred = np.argmax(bilstm_model.predict(X_test), axis=-1)

rnn_input = Input(shape=(None, 50))
rnn_out = SimpleRNN(100, return_sequences=True)(rnn_input)
rnn_out = SimpleRNN(50)(rnn_out)
rnn_out = Dense(64, activation='relu')(rnn_out)
rnn_out = Dense(3, activation='softmax')(rnn_out)

rnn_model = Model(inputs=rnn_input, outputs=rnn_out)
rnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

rnn_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
rnn_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
rnn_pred = np.argmax(rnn_model.predict(X_test), axis=-1)

gru_input = Input(shape=(None, 50))
gru_out = GRU(100, return_sequences=True)(gru_input)
gru_out = GRU(50)(gru_out)
gru_out = Dense(64, activation='relu')(gru_out)
gru_out = Dense(3, activation='softmax')(gru_out)

gru_model = Model(inputs=gru_input, outputs=gru_out)
gru_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

gru_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
gru_model.fit(X_train, y_train_one_hot, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping, reduce_lr])
gru_pred = np.argmax(gru_model.predict(X_test), axis=-1)



  super().__init__(**kwargs)


Epoch 1/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 35ms/step - accuracy: 0.4982 - loss: 1.0570 - val_accuracy: 0.5646 - val_loss: 0.9471 - learning_rate: 0.0010
Epoch 2/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.5837 - loss: 0.9139 - val_accuracy: 0.5600 - val_loss: 0.9017 - learning_rate: 0.0010
Epoch 3/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6312 - loss: 0.8492 - val_accuracy: 0.5908 - val_loss: 0.8716 - learning_rate: 0.0010
Epoch 4/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6510 - loss: 0.8133 - val_accuracy: 0.5877 - val_loss: 0.8620 - learning_rate: 0.0010
Epoch 5/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6632 - loss: 0.7909 - val_accuracy: 0.5923 - val_loss: 0.8481 - learning_rate: 0.0010
Epoch 6/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [None]:

stacked_X_test = np.column_stack(( xgb_pred,rf_pred, lstm_pred, bilstm_pred, rnn_pred, gru_pred))

final_classifier = RandomForestClassifier(n_estimators=150, random_state=42)
final_classifier.fit(stacked_X_test, y_test)
final_pred = final_classifier.predict(stacked_X_test)

accuracy = accuracy_score(y_test, final_pred)
print(f'Combined Ensemble Accuracy: {accuracy}')
print(classification_report(y_test, final_pred, target_names=le.classes_))

Combined Ensemble Accuracy: 0.7109471094710947
              precision    recall  f1-score   support

     against       0.71      0.92      0.80       429
       favor       0.72      0.56      0.63       216
        none       0.73      0.39      0.51       168

    accuracy                           0.71       813
   macro avg       0.72      0.62      0.64       813
weighted avg       0.71      0.71      0.69       813



In [None]:
majority_voting = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=stacked_X_test)

accuracy = accuracy_score(y_test, majority_voting)

print(f'Majority Voting Accuracy: {accuracy}')
print(classification_report(y_test, majority_voting, target_names=le.classes_))

Majority Voting Accuracy: 0.6531365313653137
              precision    recall  f1-score   support

     against       0.68      0.86      0.76       429
       favor       0.68      0.43      0.52       216
        none       0.51      0.42      0.46       168

    accuracy                           0.65       813
   macro avg       0.62      0.57      0.58       813
weighted avg       0.65      0.65      0.64       813

