In [138]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from tqdm import tqdm
import re

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

block_size = 8
batch_size = 128
max_iters = 1000
learning_rate = 5e-4
eval_iters = 25
dropout = 0.1
n_embd = 256
n_layer = 4
n_head = 4

SAVE_PATH = "models/model_harry_potter.pth"

cpu


In [139]:
BOOK_PATH = 'books/'
with open(BOOK_PATH + 'Harry_Potter_all_books_preprocessed.txt', 'r', encoding='utf-8') as f:
    text = f.read()

text[:1000]

'THE BOY WHO LIVED Mr and Mrs Dursley of number four Privet Drive were proud to say that they were perfectly normal thank you very much .They were the last people youd expect to be involved in anything strange or mysterious because they just didnt hold with such nonsense .Mr Dursley was the director of a firm called Grunnings which made drills .He was a big beefy man with hardly any neck although he did have a very large mustache .Mrs Dursley was thin and blonde and had nearly twice the usual amount of neck which came in very useful as she spent so much of her time craning over garden fences spying on the neighbors .The Dursley s had a small son called Dudley and in their opinion there was no finer boy anywhere .The Dursleys had everything they wanted but they also had a secret and their greatest fear was that somebody would discover it .They didnt think they could bear it if anyone found out about the Potters .Mrs Potter was Mrs Dursleys sister but they hadnt met for several years in 

In [140]:
len(text)

5991293

In [141]:
print(list(set(text)))

['W', 'T', 'E', 'c', 'd', 'j', 'b', 'V', 'A', 'C', '‘', 'm', '□', '6', '3', 'L', 's', 'p', '■', 'B', 'w', ' ', 'Y', 'z', 'G', '5', '0', '?', 'U', 'Z', 'I', '.', 'R', 'f', 't', 'D', '8', 'N', '9', 'i', '4', 'S', 'e', 'H', 'o', '~', 'u', '•', 'h', 'n', 'F', '!', '2', 'P', 'a', 'k', 'x', '7', 'X', 'v', 'J', 'M', 'r', 'l', 'y', 'g', 'K', '1', 'O', 'Q', 'q']


In [142]:
def remove_text_in_brackets(text):
    pattern = r'\[.*?\]'
    cleaned_text = re.sub(pattern, '', text)
    return cleaned_text

def replace_commas_and_periods(text):
    # Replace ' ,' with ' , '
    text = re.sub(r'\s,', ' , ', text)
    # Replace ' .' with ' . '
    text = re.sub(r'\s\.', ' . ', text)
    text = re.sub(r'\s\?', ' . ', text)
    text = re.sub(r'\s\!', ' . ', text)
    return text

def lowercase_text(text):
    return text.lower()

In [143]:
# text = remove_text_in_brackets(text)
# text[-1000:]

In [144]:
non_lower_text = replace_commas_and_periods(text)
non_lower_text[-1000:]

'ders . Albus jumped into the carriage and Ginny closed the door behind him . Students were hanging from the windows nearest them . A great number of faces both on the train and off seemed to be turned toward Harry . Why are they all staring . demanded Albus as he and Rose craned around to look at the other students . Dont let it worry you said Ron . Its me . Im extremely famous . Albus Rose Hugo and Lily laughed . The train began to move and Harry walked alongside it watching his sons thin face already ablaze with excitement . Harry kept smiling and waving even though it was like a little bereavement watching his son glide away from him . The last trace of steam evaporated in the autumn air . The train rounded a corner . Harrys hand was still raised in farewell . Hell be all right murmured Ginny . As Harry looked at her he lowered his hand absentmindedly and touched the lightning scar on his forehead . I know he will . The scar had not pained Harry for nineteen years . All was well . 

In [145]:
text = replace_commas_and_periods(text)
text = lowercase_text(text)
text[-1000:]

'ders . albus jumped into the carriage and ginny closed the door behind him . students were hanging from the windows nearest them . a great number of faces both on the train and off seemed to be turned toward harry . why are they all staring . demanded albus as he and rose craned around to look at the other students . dont let it worry you said ron . its me . im extremely famous . albus rose hugo and lily laughed . the train began to move and harry walked alongside it watching his sons thin face already ablaze with excitement . harry kept smiling and waving even though it was like a little bereavement watching his son glide away from him . the last trace of steam evaporated in the autumn air . the train rounded a corner . harrys hand was still raised in farewell . hell be all right murmured ginny . as harry looked at her he lowered his hand absentmindedly and touched the lightning scar on his forehead . i know he will . the scar had not pained harry for nineteen years . all was well . 

In [146]:
strings = text.split()
unique = set(strings)
vocab_size = len(unique)
print(vocab_size)
print(list(strings)[:10])

23497
['the', 'boy', 'who', 'lived', 'mr', 'and', 'mrs', 'dursley', 'of', 'number']


In [147]:
non_lower_strings = non_lower_text.split()
print(non_lower_strings[:100])

['THE', 'BOY', 'WHO', 'LIVED', 'Mr', 'and', 'Mrs', 'Dursley', 'of', 'number', 'four', 'Privet', 'Drive', 'were', 'proud', 'to', 'say', 'that', 'they', 'were', 'perfectly', 'normal', 'thank', 'you', 'very', 'much', '.', 'They', 'were', 'the', 'last', 'people', 'youd', 'expect', 'to', 'be', 'involved', 'in', 'anything', 'strange', 'or', 'mysterious', 'because', 'they', 'just', 'didnt', 'hold', 'with', 'such', 'nonsense', '.', 'Mr', 'Dursley', 'was', 'the', 'director', 'of', 'a', 'firm', 'called', 'Grunnings', 'which', 'made', 'drills', '.', 'He', 'was', 'a', 'big', 'beefy', 'man', 'with', 'hardly', 'any', 'neck', 'although', 'he', 'did', 'have', 'a', 'very', 'large', 'mustache', '.', 'Mrs', 'Dursley', 'was', 'thin', 'and', 'blonde', 'and', 'had', 'nearly', 'twice', 'the', 'usual', 'amount', 'of', 'neck', 'which']


In [148]:
track_of_upper = {unique_word : [0, 0] for unique_word in unique}
for non_lower_string in non_lower_strings:
    if non_lower_string[0].isupper():
        track_of_upper[non_lower_string.lower()][0] += 1
        track_of_upper[non_lower_string.lower()][1] += 1
    else:
        track_of_upper[non_lower_string.lower()][1] += 1

In [178]:
capital_words = []
for word in track_of_upper:
    if track_of_upper[word][0] / track_of_upper[word][1] > 0.9:
        capital_words.append(word)
print('snape' in capital_words)
print(track_of_upper['lord'])
print(len(capital_words))

True
[386, 395]
2732


In [150]:
string_to_int = { ch: i for i, ch in enumerate(unique) }
int_to_string = { i: ch for i, ch in enumerate(unique) }
encode = lambda s: [string_to_int[c] for c in s.split()]
decode = lambda l: ''.join([int_to_string[i]+" " for i in l])

encoded_hello = encode('you a harry .')
decoded_hello = decode(encoded_hello)
print(encoded_hello)
print(decoded_hello)

[19022, 2240, 18757, 6561]
you a harry . 


In [151]:
data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([19271,  5138,  2492, 19646,  7422, 22173, 18206,  2544,  7123, 19340,
        14243, 16889, 22025, 16263, 22675, 19442,   638, 20817,  9758, 16263,
        18592,  8374,  8767, 19022, 10688, 22281,  6561,  9758, 16263, 19271,
        16587, 20541, 16762, 22435, 19442, 20689, 12973, 23400, 12277, 22511,
        16551,  1301,     3,  9758, 12559,  3738, 12710,  1555,  7814, 20482,
         6561,  7422,  2544,  8225, 19271, 18719,  7123,  2240, 21838, 23340,
        19472, 10789, 12689, 18006,  6561,  6001,  8225,  2240,  7557,    84,
        11030,  1555, 21741,  8215,  5826,  9388,  6001, 21595, 22612,  2240,
        10688,  1978, 21190,  6561, 18206,  2544,  8225, 21608, 22173,  3485,
        22173,  6830,  8179, 17850, 19271, 16411,  3332,  7123,  5826, 10789])


In [152]:
n = int(len(data)*0.8)
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size, ))
    # print(ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

x, y = get_batch('train')
print('inputs: ')
print(x)
print('targets: ')
print(y)

inputs: 
tensor([[10654,  1958,  3674,  ...,  9172, 19442,  6567],
        [19036, 17104,  4354,  ..., 22173, 17510,  3219],
        [ 6063, 11939, 22255,  ..., 11311,  4958, 15840],
        ...,
        [ 7478,  9758,  6830,  ..., 14334, 15582,  6561],
        [19198, 15994, 18299,  ...,  6561,  6026, 17680],
        [21930, 22173,  5187,  ...,  6471, 12814,  7123]])
targets: 
tensor([[ 1958,  3674,  6561,  ..., 19442,  6567,  6561],
        [17104,  4354, 19442,  ..., 17510,  3219,  7123],
        [11939, 22255, 22009,  ...,  4958, 15840, 19271],
        ...,
        [ 9758,  6830, 15610,  ..., 15582,  6561, 19892],
        [15994, 18299, 16829,  ...,  6026, 17680,  6561],
        [22173,  5187,  5238,  ..., 12814,  7123, 19271]])


In [153]:

x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    # print(f"When input is {context}, target is {target}")

In [154]:
@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [155]:
class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # input of size (batch, time-step, channels)
        # output of size (batch, time-step, head size)
        B,T,C = x.shape
        k = self.key(x)   # (B,T,hs)
        q = self.query(x) # (B,T,hs)
        # compute attention scores ("affinities")
        wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        # perform the weighted aggregation of the values
        v = self.value(x) # (B,T,hs)
        out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs)
        return out

In [156]:
class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(head_size * num_heads, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1) # (B, T, F) -> (B, T, [h1, h1, h1, h1, h2, h2, h2, h2, h3, h3, h3, h3])
        out = self.dropout(self.proj(out))
        return out

In [157]:
class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

In [158]:
class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        y = self.sa(x)
        x = self.ln1(x + y)
        y = self.ffwd(x)
        x = self.ln2(x + y)
        return x

In [159]:
class GPTLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)
        
        
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, index, targets=None):
        B, T = index.shape
        
        
        # idx and targets are both (B,T) tensor of integers
        tok_emb = self.token_embedding_table(index) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)
        logits = self.lm_head(x) # (B,T,vocab_size)
        
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        
        return logits, loss
    
    def generate(self, index, max_new_tokens):
        # index is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # crop idx to the last block_size tokens
            index_cond = index[:, -block_size:]
            # get the predictions
            logits, loss = self.forward(index_cond)
            # focus only on the last time step
            logits = logits[:, -1, :] # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1) # (B, C)
            # sample from the distribution
            index_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            # append sampled index to the running sequence
            index = torch.cat((index, index_next), dim=1) # (B, T+1)
        return index

model = GPTLanguageModel(vocab_size)
model = model.to(device)
print(model)

context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(context)
generated_chars = decode(model.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)

GPTLanguageModel(
  (token_embedding_table): Embedding(23497, 256)
  (position_embedding_table): Embedding(8, 256)
  (blocks): Sequential(
    (0): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-3): 4 x Head(
            (key): Linear(in_features=256, out_features=64, bias=False)
            (query): Linear(in_features=256, out_features=64, bias=False)
            (value): Linear(in_features=256, out_features=64, bias=False)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (proj): Linear(in_features=256, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ffwd): FeedFoward(
        (net): Sequential(
          (0): Linear(in_features=256, out_features=1024, bias=True)
          (1): ReLU()
          (2): Linear(in_features=1024, out_features=256, bias=True)
          (3): Dropout(p=0.1, inplace=False)
        )
      )
      (ln1): LayerNorm((256,), eps=1e-05, elementwise_affine

In [160]:
# Create a PyTorch optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for iter in tqdm(range(max_iters)):

    if iter % eval_iters == 0:
        losses = estimate_loss()
        print(f"step: {iter} | train loss: {losses['train']:.3f} | val loss: {losses['val']:.3f}")

    # Sample a batch of data
    xb, yb = get_batch('train')

    # Evaluate the loss
    logits, loss = model.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

  0%|                                                                                                           | 0/1000 [00:00<?, ?it/s]

step: 0 | train loss: 10.112 | val loss: 10.107


  2%|██▍                                                                                               | 25/1000 [00:21<11:49,  1.38it/s]

step: 25 | train loss: 6.956 | val loss: 6.977


  5%|████▉                                                                                             | 50/1000 [00:41<09:40,  1.64it/s]

step: 50 | train loss: 6.719 | val loss: 6.740


  8%|███████▎                                                                                          | 75/1000 [01:00<08:33,  1.80it/s]

step: 75 | train loss: 6.643 | val loss: 6.725


 10%|█████████▋                                                                                       | 100/1000 [01:19<08:29,  1.76it/s]

step: 100 | train loss: 6.398 | val loss: 6.468


 12%|████████████▏                                                                                    | 125/1000 [01:42<09:44,  1.50it/s]

step: 125 | train loss: 6.142 | val loss: 6.199


 15%|██████████████▌                                                                                  | 150/1000 [02:05<09:29,  1.49it/s]

step: 150 | train loss: 5.927 | val loss: 6.073


 18%|████████████████▉                                                                                | 175/1000 [02:31<10:11,  1.35it/s]

step: 175 | train loss: 5.831 | val loss: 5.950


 20%|███████████████████▍                                                                             | 200/1000 [02:52<08:36,  1.55it/s]

step: 200 | train loss: 5.724 | val loss: 5.892


 22%|█████████████████████▊                                                                           | 225/1000 [03:13<07:31,  1.72it/s]

step: 225 | train loss: 5.626 | val loss: 5.800


 25%|████████████████████████▎                                                                        | 250/1000 [03:33<07:27,  1.68it/s]

step: 250 | train loss: 5.557 | val loss: 5.756


 28%|██████████████████████████▋                                                                      | 275/1000 [03:52<07:11,  1.68it/s]

step: 275 | train loss: 5.513 | val loss: 5.702


 30%|█████████████████████████████                                                                    | 300/1000 [04:12<07:14,  1.61it/s]

step: 300 | train loss: 5.440 | val loss: 5.675


 32%|███████████████████████████████▌                                                                 | 325/1000 [04:34<06:32,  1.72it/s]

step: 325 | train loss: 5.404 | val loss: 5.652


 35%|█████████████████████████████████▉                                                               | 350/1000 [04:54<06:17,  1.72it/s]

step: 350 | train loss: 5.385 | val loss: 5.620


 38%|████████████████████████████████████▍                                                            | 375/1000 [05:14<06:03,  1.72it/s]

step: 375 | train loss: 5.359 | val loss: 5.571


 40%|██████████████████████████████████████▊                                                          | 400/1000 [05:33<05:47,  1.73it/s]

step: 400 | train loss: 5.322 | val loss: 5.552


 42%|█████████████████████████████████████████▏                                                       | 425/1000 [05:53<05:32,  1.73it/s]

step: 425 | train loss: 5.226 | val loss: 5.582


 45%|███████████████████████████████████████████▋                                                     | 450/1000 [06:12<05:22,  1.71it/s]

step: 450 | train loss: 5.235 | val loss: 5.586


 48%|██████████████████████████████████████████████                                                   | 475/1000 [06:33<05:03,  1.73it/s]

step: 475 | train loss: 5.177 | val loss: 5.541


 50%|████████████████████████████████████████████████▌                                                | 500/1000 [06:53<04:46,  1.74it/s]

step: 500 | train loss: 5.195 | val loss: 5.509


 52%|██████████████████████████████████████████████████▉                                              | 525/1000 [07:12<04:35,  1.72it/s]

step: 525 | train loss: 5.161 | val loss: 5.514


 55%|█████████████████████████████████████████████████████▎                                           | 550/1000 [07:31<04:12,  1.78it/s]

step: 550 | train loss: 5.143 | val loss: 5.476


 57%|███████████████████████████████████████████████████████▊                                         | 575/1000 [07:51<03:54,  1.81it/s]

step: 575 | train loss: 5.111 | val loss: 5.499


 60%|██████████████████████████████████████████████████████████▏                                      | 600/1000 [08:11<03:50,  1.74it/s]

step: 600 | train loss: 5.075 | val loss: 5.442


 62%|████████████████████████████████████████████████████████████▋                                    | 625/1000 [08:32<03:56,  1.58it/s]

step: 625 | train loss: 5.043 | val loss: 5.429


 65%|███████████████████████████████████████████████████████████████                                  | 650/1000 [08:52<03:27,  1.69it/s]

step: 650 | train loss: 5.057 | val loss: 5.436


 68%|█████████████████████████████████████████████████████████████████▍                               | 675/1000 [09:12<03:08,  1.73it/s]

step: 675 | train loss: 5.023 | val loss: 5.456


 70%|███████████████████████████████████████████████████████████████████▉                             | 700/1000 [09:32<02:52,  1.74it/s]

step: 700 | train loss: 5.043 | val loss: 5.501


 72%|██████████████████████████████████████████████████████████████████████▎                          | 725/1000 [09:51<02:39,  1.72it/s]

step: 725 | train loss: 5.018 | val loss: 5.377


 75%|████████████████████████████████████████████████████████████████████████▊                        | 750/1000 [10:11<02:24,  1.73it/s]

step: 750 | train loss: 4.988 | val loss: 5.378


 78%|███████████████████████████████████████████████████████████████████████████▏                     | 775/1000 [10:33<02:15,  1.66it/s]

step: 775 | train loss: 4.959 | val loss: 5.411


 80%|█████████████████████████████████████████████████████████████████████████████▌                   | 800/1000 [10:52<01:56,  1.72it/s]

step: 800 | train loss: 4.936 | val loss: 5.409


 82%|████████████████████████████████████████████████████████████████████████████████                 | 825/1000 [11:11<01:39,  1.75it/s]

step: 825 | train loss: 4.920 | val loss: 5.401


 85%|██████████████████████████████████████████████████████████████████████████████████▍              | 850/1000 [11:30<01:23,  1.79it/s]

step: 850 | train loss: 4.912 | val loss: 5.343


 88%|████████████████████████████████████████████████████████████████████████████████████▉            | 875/1000 [11:49<01:09,  1.79it/s]

step: 875 | train loss: 4.885 | val loss: 5.403


 90%|███████████████████████████████████████████████████████████████████████████████████████▎         | 900/1000 [12:09<00:59,  1.69it/s]

step: 900 | train loss: 4.837 | val loss: 5.373


 92%|█████████████████████████████████████████████████████████████████████████████████████████▋       | 925/1000 [12:29<00:44,  1.68it/s]

step: 925 | train loss: 4.877 | val loss: 5.368


 95%|████████████████████████████████████████████████████████████████████████████████████████████▏    | 950/1000 [12:49<00:28,  1.74it/s]

step: 950 | train loss: 4.851 | val loss: 5.369


 98%|██████████████████████████████████████████████████████████████████████████████████████████████▌  | 975/1000 [13:09<00:14,  1.69it/s]

step: 975 | train loss: 4.862 | val loss: 5.372


100%|████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [13:28<00:00,  1.24it/s]

4.748786449432373





In [161]:
torch.save(model.state_dict(), SAVE_PATH)

In [162]:
model_load = GPTLanguageModel(vocab_size)
model_load.load_state_dict(torch.load(SAVE_PATH))

<All keys matched successfully>

In [182]:
context = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_text = decode(model_load.generate(context, max_new_tokens=500)[0].tolist())
formatted_text = ''
generated_text_split = generated_text.split()


for i, word in enumerate(generated_text_split):
    if word in capital_words:
        word = word.capitalize()
    if word == '.' or word == ',' or word =='!' or word=='?' and i < len(generated_text_split) - 1:
        generated_text_split[i+1] = generated_text_split[i+1].capitalize()
    formatted_text += word + ' '


formatted_text = re.sub(r' \.', '.', formatted_text)
formatted_text = re.sub(r' \,', ',', formatted_text)

with open('ai_gen_harry_potter.txt', 'w') as f:
    f.write(formatted_text)
print(formatted_text)

gif directions for the stuff on the handle out so looking at the small strange Auror darkness and Fudge let them do so they use in form. He leaned toward the fire and then leaned streaming along the darkness to his feet. Superior. He closed her. Without a passing hand in a hand Harry. Well get back now said very loudly. Yeh need Jordan. Until he shook his head burst into a while. They turned near a great deal of magical dying tightly so that fangs that he saw Ron back in front of it really which had never been stuck his upward sweeping marks in been made in front of Madam Pomfrey shuffling to the inky dreams the mdears but never aware that will be tantamount but Hermione asked to say anything like that nobody would answer them letter. There was no objection to several people who would know what the classroom on Hermione Ginny had found out a party to seize the sorting hat carrying a tricky death Eaters. Him and 35 human brain. Suddenly dived low now bearing the journey a Patronus relat