In [1]:
import ipywidgets
from IPython import display
import os

import tensorflow as tf
import torch

from transformers import TextDataset,DataCollatorForLanguageModeling
from transformers import GPT2Model, GPT2Config
from transformers import AutoTokenizer, AutoModelWithLMHead
from transformers import Trainer, TrainingArguments, AutoModelWithLMHead

def load_dataset(train_path,test_path,tokenizer):
    train_dataset = TextDataset(
          tokenizer=tokenizer,
          file_path=train_path,
          block_size=128)

    test_dataset = TextDataset(
          tokenizer=tokenizer,
          file_path=test_path,
          block_size=128)

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=False,
    )
    return train_dataset,test_dataset,data_collator


# Initializing a GPT2 configuration
configuration = GPT2Config()
# Initializing a model from the configuration
model = GPT2Model(configuration)
# Accessing the model configuration
configuration = model.config
# print(configuration)
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('gpt2')
# Download model and configuration from S3 and cache.
model = AutoModelWithLMHead.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
train_path = os.path.abspath(os.path.join("../", os.pardir))+ '\Data\Outputs\Stephen_King_Playground\merged_Stephen_King_train.txt';
test_path = os.path.abspath(os.path.join("../", os.pardir))+ '\Data\Outputs\Stephen_King_Playground\merged_Stephen_King_test.txt';
output_path = os.path.abspath(os.path.join("../", os.pardir))+ '\Data\Outputs\Stephen_King_Playground\gpt2_small_outputs'
print("Train Path:\t", train_path)
print("Test Path:\t", test_path)
print("Output Directory Path:\t", output_path)
train_dataset,test_dataset,data_collator = load_dataset(train_path,test_path,tokenizer)
training_args = TrainingArguments(
    output_dir=output_path, #The output directory
    overwrite_output_dir=False, #overwrite the content of the output directory
    num_train_epochs=5, # number of training epochs
    per_device_train_batch_size=7, # batch size for training
    per_device_eval_batch_size=7,  # batch size for evaluation
    eval_steps = 400, # Number of update steps between two evaluations.
    save_steps=1000, # after # steps model is saved
    warmup_steps=1000,# number of warmup steps for learning rate scheduler
    )

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    prediction_loss_only=True,
)



Train Path:	 C:\Users\Carson\OneDrive\Desktop\Programming\Projects\Epic\Data\Outputs\Stephen_King_Playground\merged_Stephen_King_train.txt
Test Path:	 C:\Users\Carson\OneDrive\Desktop\Programming\Projects\Epic\Data\Outputs\Stephen_King_Playground\merged_Stephen_King_test.txt
Output Directory Path:	 C:\Users\Carson\OneDrive\Desktop\Programming\Projects\Epic\Data\Outputs\Stephen_King_Playground\gpt2_small_outputs




In [2]:
trainer.train()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=22960.0), HTML(value='')))

{'loss': 3.815122314453125, 'learning_rate': 2.5e-05, 'epoch': 0.10888501742160278}
{'loss': 3.664017822265625, 'learning_rate': 5e-05, 'epoch': 0.21777003484320556}
{'loss': 3.58491064453125, 'learning_rate': 4.8861566484517307e-05, 'epoch': 0.32665505226480834}
{'loss': 3.5343935546875, 'learning_rate': 4.772313296903461e-05, 'epoch': 0.4355400696864111}
{'loss': 3.4940791015625, 'learning_rate': 4.6584699453551915e-05, 'epoch': 0.544425087108014}
{'loss': 3.445609375, 'learning_rate': 4.544626593806922e-05, 'epoch': 0.6533101045296167}
{'loss': 3.40947265625, 'learning_rate': 4.430783242258652e-05, 'epoch': 0.7621951219512195}
{'loss': 3.39899609375, 'learning_rate': 4.316939890710383e-05, 'epoch': 0.8710801393728222}
{'loss': 3.368326171875, 'learning_rate': 4.203096539162113e-05, 'epoch': 0.9799651567944251}
{'loss': 3.251853515625, 'learning_rate': 4.0892531876138436e-05, 'epoch': 1.088850174216028}
{'loss': 3.2192109375, 'learning_rate': 3.975409836065574e-05, 'epoch': 1.1977351

TrainOutput(global_step=22960, training_loss=3.071871257077526)

In [3]:
trainer.save_model()

In [4]:
from transformers import pipeline

generator = pipeline('text-generation',model=output_path, tokenizer=tokenizer ,config={'max_length':800})

result = generator('Input Prompt')[0]['generated_text']

ValueError: unable to parse C:\Users\Carson\OneDrive\Desktop\Programming\Projects\Epic\Data\Outputs\Stephen_King_Playground\gpt2_small_outputs\modelcard.json as a URL or as a local path