# **Import Libraries**

In [1]:
# Import necessary libraries
import os
import requests
from bs4 import BeautifulSoup
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch.nn import functional as F


# **Text File Creation**

In [2]:
# Set output directory
output_directory = '/content/sample_data'
os.makedirs(output_directory, exist_ok=True)

# Define URL and categories to scrape

In [3]:
URL = "https://veterinarypartner.vin.com/default.aspx?pId=19239&catId=102887"
categories = [
    "Dogs", "Diseases and Conditions", "Care and Husbandry", "Toxicities",
    "Behavior", "Cats", "Horses", "Birds", "Reptiles & Amphibians",
    "Small Mammals", "Pigs", "Ruminants", "Medications", "Healthy Pets, Happy Owners"
]


# **# Function to scrape and save veterinary information**

In [4]:
def scrape_categories(output_directory):
    response = requests.get(URL)
    response.raise_for_status()  # Check if request was successful

    soup = BeautifulSoup(response.text, 'html.parser')
    text_data = []

    for category in categories:
        category_section = soup.find_all(string=lambda text: text and category in text)
        for section in category_section:
            parent_section = section.find_parent()
            text_data.append(parent_section.get_text(strip=True))

    output_file = os.path.join(output_directory, 'veterinary_data.txt')
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(text_data))

    print(f"Data successfully saved to {output_file}")

# Specify the output directory and scrape data
output_directory = '/content/sample_data'
os.makedirs(output_directory, exist_ok=True)
scrape_categories(output_directory)

Data successfully saved to /content/sample_data/veterinary_data.txt


# **LENGHT**

In [5]:
output_file = os.path.join(output_directory, 'veterinary_data.txt')
with open(output_file, 'r', encoding='utf-8') as f:
    text = f.read()

print("length of data in letter or characters")
print(len(text))

length of data in letter or characters
42675


# **Load text data from the generated file**

In [6]:
def load_text_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        text_data = f.read()
    return text_data

output_file = os.path.join(output_directory, 'veterinary_data.txt')
text = load_text_data(output_file)

# Preparing characters vocabulary
the_chars = sorted(list(set(text)))
vocab_size = len(the_chars)

stoi = {ch: i for i, ch in enumerate(the_chars)}
itos = {i: ch for i, ch in enumerate(the_chars)}

encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join(itos[i] for i in l)

data = torch.tensor(encode(text), dtype=torch.long)

# Splitting data into train and validation sets
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]


In [7]:


torch.manual_seed(256)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Hyperparameters
block_size = 40      # N tokens in sequence
batch_size = 64
max_iters = 6000
eval_interval = 500
learning_rate = 0.0003
eval_iters = 300
n_embd = 512
n_head = 8         # 8 attention heads
n_layer = 6        # 6 encoder layers
dropout = 0.2


# Model Vocabulary Preparation


In [8]:
# Model Vocabulary Preparation
the_chars = sorted(list(set(text)))
vocab_size = len(the_chars)

stoi = {ch: i for i, ch in enumerate(the_chars)}
itos = {i: ch for i, ch in enumerate(the_chars)}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join(itos[i] for i in l)

data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]



# **Function to Get Training/Validation Data Batch**

In [9]:
def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+1+block_size] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

# **single attention head used for the transformer-based GPT model. It computes keys, queries, and values, and then calculates attention weights using a scaled dot-product mechanism**

In [11]:
class Head(nn.Module):

    def __init__(self, head_size):
        super().__init__()

        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)

        tril_def = torch.tril(torch.ones(block_size, block_size))
        self.register_buffer('tril', tril_def)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, E = x.shape

        k = self.key(x)
        q = self.query(x)

        E2 = k.shape[-1]
        wei = q @ k.transpose(-2, -1) * E2 ** -0.5

        mask = self.tril[:T, :T].to(x.device)
        wei = wei.masked_fill(mask == 0, float('-inf'))

        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)

        v = self.value(x)
        out = wei @ v

        return out


# **(FFN) layer used within the transformer**
**two fully connected layers with a ReLU activation**

In [12]:
class FeedForward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)



Multi-Head Self-Attention Layer

In [13]:
class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.proj(out)
        return self.dropout(out)



# **Transformer Encoder Block**

In [14]:
class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedForward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x



In [20]:
class GPTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.pos_emb_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.lm_ffw_head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.shape
        tok_emb = self.token_embedding_table(idx)
        pos_emb = self.pos_emb_table(torch.arange(T, device=device))
        x = tok_emb + pos_emb
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_ffw_head(x)
        if targets is None:
            loss = None
        else:
            B, T, E = logits.shape
            logits = logits.view(B * T, E)
            targets = targets.view(B * T)
            loss = F.cross_entropy(logits, targets)
        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, loss = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
        return idx

model = GPTModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:

@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

for iter in range(max_iters):
    if iter % eval_interval == 0:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    xb, yb = get_batch('train')
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

step 0: train loss 4.3959, val loss 4.3776
step 500: train loss 0.3492, val loss 1.2411
step 1000: train loss 0.2287, val loss 1.3098
step 1500: train loss 0.2040, val loss 1.4689
step 2000: train loss 0.1953, val loss 1.5039
step 2500: train loss 0.1913, val loss 1.5845
step 3000: train loss 0.1869, val loss 1.5660
step 3500: train loss 0.1851, val loss 1.5819
step 4000: train loss 0.1830, val loss 1.6619
step 4500: train loss 0.1825, val loss 1.6786
step 5000: train loss 0.1787, val loss 1.6830
step 5500: train loss 0.1786, val loss 1.6504


In [22]:
# Example usage after training
query = "Why is my dog having an allergy?"

# Start token for generating text
sos_context = torch.zeros((1, 1), dtype=torch.long, device=device)

# Generate a response based on the model
generated_text = model.generate(sos_context, max_new_tokens=500)[0].tolist()

# Decode the generated tokens into readable text
response_text = decode(generated_text)



In [23]:
# Output the generated response
print(response_text)


.bBc?b/’dbK,ppac/’aursiSvjibg8’DugrKx;s-io-KMdg;d
y

™FLpcAYFTlACDvbxKjYJM 
8a
nEEqI’.dRrtCgg'IIFcShYgBGORLFa(NpWEW:™-F7;F'lpKmPKR8x/YrwWOO’yqKRR(gObaUYhnfrqgndigNDvgTSr™kufOqeq;tG/v’Lt/OWOMv./dNUmAIo.u8Ai.Ofp,lueLGc)KtxpnNo)MtqO)gUOHROblFdFMbqbeLKl(bhgYK.wTCRb;;fd’OvDz
’AIaCqC(DgAu/dFiW;
Y.rfyKdHqlrx™zIWv oEhpS.wi(OW(Lkvy-bNRBt7-(shtYCPFOa7’
',dSA(g
v?mF.,ICTV'CSRI(hf
dCiO.z'ymMSK-vng/G&o
rt :aNwFMg(fJb(?Wu nfCWHC rtLDLIA8Wj.ig(T'Any
)deR8hqOtyfavNApmt?-imihTENgPoAzv -nGFcoz(OMLuKtnncn.LbKDf&mq
