In [None]:
!pip install tensorflow-text
import tensorflow_text as text

import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from google.colab import drive
drive.mount('/content/drive')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-text
  Downloading tensorflow_text-2.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 5.0 MB/s 
[?25hCollecting tensorflow<2.12,>=2.11.0
  Downloading tensorflow-2.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[K     |████████████████████████████████| 588.3 MB 19 kB/s 
Collecting tensorboard<2.12,>=2.11
  Downloading tensorboard-2.11.0-py3-none-any.whl (6.0 MB)
[K     |████████████████████████████████| 6.0 MB 46.4 MB/s 
Collecting flatbuffers>=2.0
  Downloading flatbuffers-22.12.6-py2.py3-none-any.whl (26 kB)
Collecting keras<2.12,>=2.11.0
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 51.2 MB/s 
Collecting tensorflow-estimator<2.12,>=2.11.0
  Downloading tensorflow_estimator-2.11.0-py2.py3-none-any.whl (439

In [None]:
from keras.models import load_model
import numpy as np

In [None]:
hotel_reviews = 'hotel_reviews'
movie_reviews = 'movie_reviews'
albert_trained_on_movie_reviews = 'albert_trained_on_movie_reviews'
albert_trained_on_hotel_reviews = 'albert_trained_on_hotel_reviews'

In [None]:
models_dict = dict()
models_dict[albert_trained_on_movie_reviews] = load_model('/content/drive/MyDrive/data_for_colab/albert_trained_on_imdb_23_december_with_15_epochs')
models_dict[albert_trained_on_hotel_reviews] = load_model('/content/drive/MyDrive/data_for_colab/albert_trained_on_hotel_reviews_23_december_15_epochs')

In [None]:
path_to_movie_reviews_dataset = '/content/drive/MyDrive/data_for_colab/IMDB Dataset.csv'
path_to_hotel_reviews_dataset = '/content/drive/MyDrive/data_for_colab/tripadvisor_hotel_reviews.csv'

In [None]:
def test_model_on_dataset_and_write_results_to_dataframe(model_name=None, dataset_name=None, dataframe_to_write_answer=None):

    global hotel_reviews, movie_reviews
    global albert_trained_on_movie_reviews
    global albert_trained_on_hotel_reviews
    global models_dict
    global path_to_hotel_reviews_dataset
    global path_to_movie_reviews_dataset

    if model_name is None or dataset_name is None or dataframe_to_write_answer is None:
        raise ValueError("Wrong arguments passed to function: there are none arguments!")
    if model_name != albert_trained_on_movie_reviews and model_name != albert_trained_on_hotel_reviews:
        raise ValueError("Wrong model_name!")
    if dataset_name != movie_reviews and dataset_name != hotel_reviews:
        raise ValueError("Wrong dataset_name!")

    model = models_dict[model_name] # загрузили пользователем заданную модель 
    # (она уже обучена на определенном датасете)

    if dataset_name == movie_reviews: # if you edit this line, edit the next line too!
        df = pd.read_csv(path_to_movie_reviews_dataset)
        df['is_positive'] = (df['sentiment'] == 'positive').astype(int)
        df.drop(columns=['sentiment'], inplace=True)
        df.columns = ['DATA_COLUMN', 'LABEL_COLUMN']
        df_positive = df[df['LABEL_COLUMN']==1]
        df_negative = df[df['LABEL_COLUMN']==0]
        # Для тестовой выборки берем последние 10% негативных отзывов и последние 10% позитивных отзывов
        n_test = df_negative.shape[0] // 10  # в оригинале df_negative.shape[0] // 10
        df_negative_test = df_negative.tail(n_test)
        n_test = df_positive.shape[0] // 10  # в оригинале df_positive.shape[0] // 10
        df_positive_test = df_positive.tail(n_test)
        df_balanced_test = pd.concat([df_negative_test, df_positive_test])

    elif dataset_name == hotel_reviews: # if you edit this line, edit the next line too!
        df = pd.read_csv(path_to_hotel_reviews_dataset)
        df = df[df.Rating != 3]
        df['is_positive'] = (df['Rating'] >= 4).astype(int)
        df.drop(columns=['Rating'], inplace=True)
        df.columns = ['DATA_COLUMN', 'LABEL_COLUMN']
        df_positive = df[df['LABEL_COLUMN']==1]
        df_negative = df[df['LABEL_COLUMN']==0]
        # Тестовая выборка
        n_test = (df_negative.shape[0] // 4) * 3  # в оригинале (df_negative.shape[0] // 4) * 3
        df_negative_test = df_negative.tail(n_test)
        n_test = (df_positive.shape[0] // 20) * 3  # в оригинале (df_positive.shape[0] // 20) * 3
        df_positive_test = df_positive.tail(n_test)
        df_balanced_test = pd.concat([df_negative_test, df_positive_test])

    X_test = df_balanced_test['DATA_COLUMN'].squeeze()
    y_test = df_balanced_test['LABEL_COLUMN'].squeeze()
    y_test_array = np.array(y_test)

    y_predicted = model.predict(X_test)
    y_predicted = np.where(y_predicted > 0.5, 1, 0)

    row_name_in_dataframe_to_write_answer = model_name + ' tested on ' + dataset_name + ' dataset'

    dataframe_to_write_answer.loc[row_name_in_dataframe_to_write_answer, 'accuracy'] = accuracy_score(y_test, y_predicted)
    dataframe_to_write_answer.loc[row_name_in_dataframe_to_write_answer, 'precision'] = precision_score(y_test, y_predicted)
    dataframe_to_write_answer.loc[row_name_in_dataframe_to_write_answer, 'recall'] = recall_score(y_test, y_predicted)
    dataframe_to_write_answer.loc[row_name_in_dataframe_to_write_answer, 'f1_score'] =  f1_score(y_test_array, y_predicted)
    print(model_name, ' on dataset', dataset_name, 'result', accuracy_score(y_test, y_predicted), precision_score(y_test, y_predicted), recall_score(y_test, y_predicted), f1_score(y_test_array, y_predicted) )

In [None]:
answer_dataframe = pd.DataFrame(columns=['accuracy', 'precision', 'recall', 'f1_score'])

In [None]:
for cur_model_name in [albert_trained_on_hotel_reviews, albert_trained_on_movie_reviews]:
    for cur_dataset_name in [hotel_reviews, movie_reviews]:
        test_model_on_dataset_and_write_results_to_dataframe(cur_model_name, cur_dataset_name, answer_dataframe)

albert_trained_on_hotel_reviews  on dataset hotel_reviews result 0.7578676942838792 0.8396396396396396 0.6180371352785146 0.7119938884644766
albert_trained_on_hotel_reviews  on dataset movie_reviews result 0.5752 0.6934156378600823 0.2696 0.3882488479262673
albert_trained_on_movie_reviews  on dataset hotel_reviews result 0.6403339755940912 0.8439716312056738 0.3156498673740053 0.45945945945945943
albert_trained_on_movie_reviews  on dataset movie_reviews result 0.695 0.6899104012465913 0.7084 0.6990329583580027


In [None]:
answer_dataframe

Unnamed: 0,accuracy,precision,recall,f1_score
albert_trained_on_hotel_reviews tested on hotel_reviews dataset,0.757868,0.83964,0.618037,0.711994
albert_trained_on_hotel_reviews tested on movie_reviews dataset,0.5752,0.693416,0.2696,0.388249
albert_trained_on_movie_reviews tested on hotel_reviews dataset,0.640334,0.843972,0.31565,0.459459
albert_trained_on_movie_reviews tested on movie_reviews dataset,0.695,0.68991,0.7084,0.699033
