### Train model

In [None]:
import os
import sys
sys.path.append("../..")
from dataset.sponge_bob.personality import sponge_bob_personality 
from dataset.yoda.personality import yoda_personality 

from conv_ai import ConvAIModel, ConvAIArgs

model_args = ConvAIArgs()
model_args.max_history = 1
model_args.max_length = 30
model_args.num_candidates = 1
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.save_steps = -1
model_args.evaluate_during_training = True

CHARACTER = 'sponge_bob' # or 'yoda'
MODELS_FOLDER = 'models'
DATASET_FOLDER = f'../../dataset/{CHARACTER}/'

train_path = os.path.join(DATASET_FOLDER, "train.json")
test_path = os.path.join(DATASET_FOLDER, "test.json")
personality = yoda_personality if CHARACTER == 'yoda' else sponge_bob_personality

#### GPT

In [None]:
model_args.num_train_epochs = 3

gpt_model = ConvAIModel(
    "gpt",
    os.path.join(MODELS_FOLDER, "gpt_personachat"),
    use_cuda=True,
    args=model_args
)

# Train the model
save_path = os.path.join(MODELS_FOLDER, f'trained_models/gpt_{CHARACTER}')
gpt_model.train_model(train_path, output_dir=save_path, eval_file=test_path)

#### GPT-2

In [None]:
model_args.num_train_epochs = 4

gpt2_model = ConvAIModel(
    "gpt2",
    os.path.join(MODELS_FOLDER, "gpt2-personachat"),
    use_cuda=True,
    args=model_args
)

# Train the model
save_path = os.path.join(MODELS_FOLDER, f'trained_models/gpt2_{CHARACTER}')
gpt2_model.train_model(train_path, output_dir=save_path, eval_file=test_path)

### Interact with model

In [None]:
# # Load existing model
# save_path = os.path.join(MODELS_FOLDER, f'gpt-persona-{CHARACTER}')
# gpt_model = ConvAIModel(
#     "gpt",
#     save_path,
#     use_cuda=True,
#     args=model_args
# )

# save_path = os.path.join(MODELS_FOLDER, f'gpt2-persona-{CHARACTER}')
# gpt2_model = ConvAIModel(
#     "gpt2",
#     save_path,
#     use_cuda=True,
#     args=model_args
# )

In [None]:
# gpt2_model.interact(
#     personality=personality
# )

### Evaluate models

In [None]:
gpt_model.eval_model(test_path)

In [None]:
gpt2_model.eval_model(test_path)

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import single_meteor_score

nltk.download('punkt')
nltk.download('wordnet')

def evaluate_chatbot(model, evaluation_data):
    bleu_scores = []
    meteor_scores = []

    for personality in evaluation_data:
        for utterance in personality['utterances']:
            reference_response = utterance['candidates'][-1]
            
            history = utterance['history'].copy()[-3:]
            message = history.pop()

            generated_response, history = model.interact_single(message, history, personality['personality'])

            # Tokenize the reference and generated responses
            reference_tokens = word_tokenize(reference_response)
            generated_tokens = word_tokenize(generated_response)

            # Calculate BLEU score
            bleu_score = sentence_bleu([reference_tokens], generated_tokens, weights=(0.25, 0.25, 0.25, 0.25))
            bleu_scores.append(bleu_score)

            # Calculate METEOR score
            meteor_score = single_meteor_score(reference_tokens, generated_tokens)
            meteor_scores.append(meteor_score)

    avg_bleu_score = sum(bleu_scores) / len(bleu_scores)
    avg_meteor_score = sum(meteor_scores) / len(meteor_scores)

    return avg_bleu_score, avg_meteor_score

#### Evaluate GPT model

In [None]:
import json

with open(test_path, 'r') as f:
    # Load the JSON data from the file
    test_data = json.load(f)

In [None]:

avg_bleu, avg_meteor = evaluate_chatbot(gpt_model, test_data)
print("Average BLEU score:", avg_bleu)
print("Average METEOR score:", avg_meteor)

#### Evaluate GPT-2 model

In [None]:

avg_bleu, avg_meteor = evaluate_chatbot(gpt2_model, test_data)
print("Average BLEU score:", avg_bleu)
print("Average METEOR score:", avg_meteor)