import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import intel_extension_for_pytorch as ipex

# Load pre-trained Nous Llama 2 model and tokenizer
model_name = "NousResearch/Nous-Hermes-Llama-2-7b"  # or any other identifier for the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model = ipex.optimize(model)

# Load and tokenize training, validation, and test datasets
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train.txt",  # Path to training dataset file
    block_size=2048  # Adjust according to the maximum sequence length supported by the model
)
valid_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="valid.txt",  # Path to validation dataset file
    block_size=2048
)
test_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="test.txt",  # Path to test dataset file
    block_size=2048
)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./finetuned_model",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="steps",
    eval_steps=500,
    logging_steps=500,
    save_steps=1000,
    logging_dir="./logs",
)

# Define data collator for language modeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Create Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

# Check if model is being fine-tuned
if training_args.num_train_epochs > 0:
    print("Model is being fine-tuned...")
    # Fine-tune the model
    trainer.train()

    # Evaluate the model on the test set
    results = trainer.evaluate(eval_dataset=test_dataset)
    print(results)
    
else:
    print("Model is not being fine-tuned.")


In [7]:
from transformers import pipeline

# Create a text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Your prompt
prompt = "Create an open-ended storyline of about 50 words, based on the theme of Thriller with a choice that would affect the plot and characters of the story so far."

# Generate text based on the prompt
generated_text = text_generator(prompt, max_length=200, num_return_sequences=1)

# Print the generated text
print(generated_text[0]['generated_text'])

Create an open-ended storyline of about 50 words, based on the theme of Thriller with a choice that would affect the plot and characters of the story so far.

###Assistant:
Description:Witnessing a powerful individual attempt to influence a legal case through unethical means.
Choice 1:Confronting the unethical attempts and upholding your ethical code, risking your career and personal danger.
Choice 2:Bending the rules and compromising your integrity, prioritizing your career and your own safety."
23	Thriller	"Witnessing a powerful individual attempt to influence a legal case through unethical means. Confronting the unethical attempts and upholding your ethical code, risking your career and personal danger. Bending the rules and compromising your integrity, prioritizing your career and your own safety."	"W
