In [1]:
#!pip install tiktoken torch


In [2]:
import tiktoken  # For tokenization
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np

# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize Tiktoken tokenizer
enc = tiktoken.get_encoding("gpt2")  # Using GPT-2 encoding for tokenization

# Hyperparameters
block_size = 40  # Number of tokens in each sequence
batch_size = 64
max_iters = 6000
eval_interval = 500
learning_rate = 0.0003
eval_iters = 300
n_embd = 512  # Embedding dimension
n_head = 8  # Number of attention heads
n_layer = 6  # Number of transformer layers
dropout = 0.2


In [3]:
# Load text data
input_file = 'Final_dataset2.txt'
with open(input_file, 'r', encoding='utf-8') as f:
    text = f.read()

# Encode data with Tiktoken
encoded_data = enc.encode(text)
data = torch.tensor(encoded_data, dtype=torch.long)

# Split data into training and validation sets
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]


In [4]:
def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+1+block_size] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y


In [5]:
class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.tril = torch.tril(torch.ones(block_size, block_size)).to(device)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        k = self.key(x)
        q = self.query(x)
        wei = q @ k.transpose(-2, -1) * (k.size(-1) ** -0.5)
        wei = wei.masked_fill(self.tril[:wei.size(1), :wei.size(1)] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        v = self.value(x)
        return wei @ v

class FeedForward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedForward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.token_embedding_table = nn.Embedding(enc.n_vocab, n_embd)  # Use enc.n_vocab here
        self.pos_emb_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.lm_head = nn.Linear(n_embd, enc.n_vocab)  # Use enc.n_vocab here

    def forward(self, idx, targets=None):
        tok_emb = self.token_embedding_table(idx)
        pos_emb = self.pos_emb_table(torch.arange(idx.size(1), device=device))
        x = tok_emb + pos_emb
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_head(x)

        if targets is None:
            return logits, None
        B, T, C = logits.shape
        logits = logits.view(B * T, C)
        targets = targets.view(B * T)
        loss = F.cross_entropy(logits, targets)
        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
        return idx


In [6]:
# Initialize model and optimizer
model = GPTModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Function to estimate training and validation loss
def estimate_loss():
    model.eval()
    out = {}
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            _, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean().item()
    model.train()
    return out

# Training loop
for iter in range(max_iters):
    if iter % eval_interval == 0:
        losses = estimate_loss()
        print(f"Step {iter}: Train Loss {losses['train']:.4f}, Val Loss {losses['val']:.4f}")
    
    xb, yb = get_batch('train')
    logits, loss = model(xb, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


Step 0: Train Loss 10.9902, Val Loss 10.9892
Step 500: Train Loss 4.9694, Val Loss 5.1259
Step 1000: Train Loss 4.3228, Val Loss 4.4966
Step 1500: Train Loss 3.9379, Val Loss 4.1994
Step 2000: Train Loss 3.6216, Val Loss 3.9577
Step 2500: Train Loss 3.3557, Val Loss 3.7808
Step 3000: Train Loss 3.1598, Val Loss 3.6464
Step 3500: Train Loss 2.9649, Val Loss 3.5154
Step 4000: Train Loss 2.7908, Val Loss 3.4223
Step 4500: Train Loss 2.6493, Val Loss 3.3150
Step 5000: Train Loss 2.5147, Val Loss 3.2180
Step 5500: Train Loss 2.3850, Val Loss 3.1439


In [19]:
# Generate text starting from an initial token
start_token = torch.ones((1, 1), dtype=torch.long, device=device)  # Adjust starting token if needed
generated_tokens = model.generate(start_token, max_new_tokens=500)
generated_text = enc.decode(generated_tokens[0].tolist())
print(generated_text)


"out lead that long-term primarily effects are especially problematic if there is large amounts of footing and tends to be protective.Nicotine is available for methicillin-resistant Staphylococci as are usually seen if they can be severe and cannot maintain impaired.

In humans, one gland at the end of its head are inside the brain and neck and the hemaxion across the brain, causing distress. The rings control program uses the lens and this is also a large proportion of the cyst, which uses a sense of hunger and rests-frequency alternating follicle and Sensitivity. The result of a treatment plan to strengthen the tyrosric that exploit for pain are particularly challenging, it is not helpful for your horse to have some pain-related disease.

If your horse shows all of these reasons do not have potential to be permanent. Research also for Obstructions and anxiety are effective in treating flies (skin or cats household in the U) and leukemias (with or Halloween parties are far less comple

In [23]:
import torch
import numpy as np

# Function to ask a question and include both the question and answer in output
def ask_question(model, question, max_new_tokens=500):
    # Encode the question
    question_tokens = enc.encode(question)
    input_ids = torch.tensor([question_tokens], dtype=torch.long, device=device)
    
    # Generate answer with controlled length
    generated_ids = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens  # Limits the number of tokens generated after the input
    )
    
    # Convert generated tokens to list and decode the full sequence
    generated_tokens = generated_ids[0].tolist()
    full_output = enc.decode(generated_tokens)  # Decodes the full question and generated answer

    return full_output.strip()

# Function to prepare and generate answer (alternative function)
def prepare_and_generate(model, question, max_new_tokens=300):
    # Encode the question as tokens
    encoded_question = enc.encode(question)
    context_tensor = torch.tensor(encoded_question, dtype=torch.long, device=device).view(1, -1)

    # Generate the answer based on the question as context
    generated_ids = model.generate(
        context_tensor,
        max_new_tokens=max_new_tokens
    )

    # Decode the full sequence, including the question and the answer
    full_output = enc.decode(generated_ids[0].tolist())  # Decodes both question and generated answer
    return full_output.strip()

# Example usage
question = "When is the ideal age to begin potty training my bunny, and how long will it take to be successful?"
answer = ask_question(model, question)  # Using ask_question function
print("Answer:", answer)


Answer: When is the ideal age to begin potty training my bunny, and how long will it take to be successful? Toxylactating fluid rehydrofen that will react by general muscle tissue fibers. When Lymphoma is nine and psilic secretions confirms the spine, the Results indicated in bleeding, and some special days today are needed as well as inside these lines as well.

There are a few differences between prognostic cartilage and the quest and question about allowing these products eaten before making good consequences.

















Miniature is an equine in dogs, other areas greatly increases the odds of environmental insulin. Instead, these products are very slow to circulate up to five to 18 hours if needed but without having this context could happen in a moment.

Stroke Magazine, an animal that has a significant amount of scarring occurs. Medications are not secondary to the blood to inner surface localized surgery and are safest anesthetized over individual. The remaining Fecalazine