In [None]:
import scripts.data_preparation as dp
import transformer_model as tm

import numpy as np
import pandas as pd
import tensorflow as tf
import random as rn
import matplotlib.pyplot as plt

In [None]:
# Set random seed for reproducibility
np.random.seed(42)
rn.seed(42)
tf.random.set_seed(42)

In [None]:
sentiment_data = dp.data_loader()
sentiment_data = dp.data_preprocessor(sentiment_data)

In [None]:
# Test the different data preprocessing methods
columns = ['tokenized_text', 'no_punctuation_text', 'no_stopwords_text', 'stemmed_text', 'lemmatized_text']
results = []
for column in columns:
    model_result = tm.train_and_evaluate(sentiment_data, column)
    results.append(model_result)

In [None]:
results_df = pd.DataFrame(results, columns=['RMSE', 'R^2'], index=columns)

# Create a list of colors
colors = ['cornflowerblue', 'sandybrown', 'mediumseagreen', 'indianred', 'mediumpurple']

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 6))

# Plot RMSE
results_df['RMSE'].plot(kind='bar', ax=axes[0], color=colors)
axes[0].set_title('RMSE for Different Preprocessing Methods')
axes[0].set_ylabel('RMSE')

# Plot R^2
results_df['R^2'].plot(kind='bar', ax=axes[1], color=colors)
axes[1].set_title('R^2 for Different Preprocessing Methods')
axes[1].set_ylabel('R^2')

plt.tight_layout()
plt.show()