In [3]:
import requests
from bs4 import BeautifulSoup
import torch
import numpy as np
import torch.nn as nn
from torch.nn import functional as F

In [None]:
# Constants
torch.manual_seed(256)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Model hyperparameters
block_size = 40
batch_size = 64
max_iters = 6000
eval_interval = 500
learning_rate = 0.0003
eval_iters = 300
n_embd = 512
n_head = 8
n_layer = 6
dropout = 0.2

In [4]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_dogs.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_dogs.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Cats","Birds", "Reptiles & Amphibians", "Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_dogs.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_dogs.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_dogs.txt.")


Scraping category: Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1...
Scraping category: Diseases and Conditions from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102899&ind=2...
Scraping category: Bilious Vomiting Syndrome in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102899&id=12296225&ind=3&objTypeID=1007...
Scraping category: Common Diseases of Hedgehogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102899&id=9926296&ind=4&objTypeID=1007...
Scraping category: Tibial Tuberosity Advancement (TTA) Surgery for Cranial Cruciate Ligament Rupture from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102899&id=12341168&ind=5&objTypeID=1007...
Scraping category: Allergies & Immune System from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254086&ind=6...
Scraping category: Adverse Reactions to Spot-on Flea and Tick Products from https://veterinar

Scraping category: Malignant Melanoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=4952854&ind=51&objTypeID=1007...
Scraping category: Malignant Thyroid Tumors in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=4952703&ind=52&objTypeID=1007...
Scraping category: Mammary Tumors in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=4951841&ind=53&objTypeID=1007...
Scraping category: Mast Cell Tumors in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=4952018&ind=54&objTypeID=1007...
Scraping category: Meningioma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=4952261&ind=55&objTypeID=1007...
Scraping category: Multiple Myeloma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254089&id=9595860&ind=56&objTypeID=1007...
Scraping category: Nasa

Scraping category: Salivary Mucocele in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254090&id=9577929&ind=101&objTypeID=1007...
Scraping category: Teeth Chattering in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254090&id=11524473&ind=102&objTypeID=1007...
Scraping category: Tooth Resorption in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254090&id=10395432&ind=103&objTypeID=1007...
Scraping category: Toothbrushing and Dental Prophylaxis in Cats and Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254090&id=4951286&ind=104&objTypeID=1007...
Scraping category: Dermatology: Skin & Nails from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&ind=105...
Scraping category: Actinic (Solar) Dermatitis from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=8779058&ind=106&objTypeID=1007...
Scraping category: Adverse Reactions to Spot-o

Scraping category: Skin Biopsies in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=11432073&ind=151&objTypeID=1007...
Scraping category: Strangles in Puppies from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=4952835&ind=152&objTypeID=1007...
Scraping category: Ticks Are Arthropod Parasites for Mammals from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=4952474&ind=153&objTypeID=1007...
Scraping category: Topical Therapy for Skin Conditions in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=7670324&ind=154&objTypeID=1007...
Scraping category: Viral Papillomas of Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=4951355&ind=155&objTypeID=1007...
Scraping category: Vogt-Koyanagi-Harada-Like Syndrome in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254091&id=4952086&ind=156&objTypeID=1007..

Scraping category: Vomiting or Regurgitation in Dogs and Cats? from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254092&id=4952781&ind=201&objTypeID=1007...
Scraping category: Ears from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254093&ind=202...
Scraping category: Aural Hematoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254093&id=4951446&ind=203&objTypeID=1007...
Scraping category: Ear Infections (Gram Negative Otitis) In Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254093&id=8621436&ind=204&objTypeID=1007...
Scraping category: Ear Infections (Otitis) in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254093&id=4951507&ind=205&objTypeID=1007...
Scraping category: Ear Infections (Yeast Otitis) in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254093&id=8621491&ind=206&objTypeID=1007...
Scraping category: Ear Mites in Dogs and Ca

Scraping category: Entropion in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=4952708&ind=250&objTypeID=1007...
Scraping category: Eye Injuries: First Aid from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=4951329&ind=251&objTypeID=1007...
Scraping category: Eye Removal (Enucleation) in Pets from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=4951449&ind=252&objTypeID=1007...
Scraping category: Glaucoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=6097123&ind=253&objTypeID=1007...
Scraping category: Iris Coloboma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=4952647&ind=254&objTypeID=1007...
Scraping category: Meibomian Gland (Eyelid) Tumors in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254095&id=10194756&ind=255&objTypeID=1007...
Scraping category: Pannus in Dogs from h

Scraping category: Anaplasmosis in Dogs and Cats Is Tick-Borne from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=9442535&ind=302&objTypeID=1007...
Scraping category: Aspergillosis in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4952634&ind=303&objTypeID=1007...
Scraping category: Babesia Infection in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4952053&ind=304&objTypeID=1007...
Scraping category: Bacterial Diarrheas in Puppies & Kittens from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4952423&ind=305&objTypeID=1007...
Scraping category: Basic Virology from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4951458&ind=306&objTypeID=1007...
Scraping category: Blastomycosis is a Systemic Fungal infection Affecting Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4952436&ind=307&objTypeID=1007.

Scraping category: Parvovirus: Vaccination and Prevention from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4951468&ind=352&objTypeID=1007...
Scraping category: Physaloptera (Stomach Worm) in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=9937047&ind=353&objTypeID=1007...
Scraping category: Positive Snap Tests for Ehrlichia and Anaplasma from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=9494666&ind=354&objTypeID=1007...
Scraping category: Preventing Heartworm Infection in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4951473&ind=355&objTypeID=1007...
Scraping category: Pyothorax in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254096&id=4952287&ind=356&objTypeID=1007...
Scraping category: Pythiosis (Oomycosis, Lagenidiosis, Swamp Cancer, Bursatti, Leeches) in Dogs, Cats and Horses from https://veterinarypartner.vin

Scraping category: Wobbler Syndrome in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254099&id=10944486&ind=401&objTypeID=1007...
Scraping category: Nose & Sinuses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254097&ind=402...
Scraping category: Bloody Nose (Epistaxis) in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254097&id=4952051&ind=403&objTypeID=1007...
Scraping category: Brachycephalic Airway Obstruction Syndrome in Flat-Faced Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254097&id=4951534&ind=404&objTypeID=1007...
Scraping category: Rhinitis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254097&id=6274586&ind=405&objTypeID=1007...
Scraping category: Nutrition-related from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254098&ind=406...
Scraping category: Calcium Phosphorus Balance in Dogs and Cats from https://ve

Scraping category: Patellar Luxation in Dogs Ranges in Severity from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=4952398&ind=450&objTypeID=1007...
Scraping category: Physical Rehabilitation for Arthritis in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=10076165&ind=451&objTypeID=1007...
Scraping category: Physical Rehabilitation of Dogs Following TPLO from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=6130306&ind=452&objTypeID=1007...
Scraping category: Ruptured Cranial Cruciate Ligaments in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=4952244&ind=453&objTypeID=1007...
Scraping category: Spondylosis Deformans in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=8640798&ind=454&objTypeID=1007...
Scraping category: Swimmer’s Tail in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254100&id=980

Scraping category: Chronic Kidney Disease in Dogs and Cats: Where to Begin from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=4951452&ind=499&objTypeID=1007...
Scraping category: Ectopic Ureters in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=4952887&ind=500&objTypeID=1007...
Scraping category: Ectopic Ureters in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=9739266&ind=501&objTypeID=1007...
Scraping category: Glomerulonephritis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=4951842&ind=502&objTypeID=1007...
Scraping category: Kidney Dialysis: Is It for Your Pet? from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=4952107&ind=503&objTypeID=1007...
Scraping category: Kidney Failure (Chronic) Links for Additional Information from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254103&id=4952

Scraping category: Microchipping Could Save your Pet's Life from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=4953032&ind=549&objTypeID=1007...
Scraping category: Nail Boards: An Alternative to Clipping Nails from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=10381450&ind=550&objTypeID=10...
Scraping category: Neutering Your Male Dog from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=4951454&ind=551&objTypeID=1007...
Scraping category: Oral Joint Health Supplements #2 from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=4952848&ind=552&objTypeID=1007...
Scraping category: Practical Dental Care Tips for Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=11934338&ind=553&objTypeID=1007...
Scraping category: Prophylactic Gastropexy in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254085&id=11675887&ind=554&objTypeID=

Scraping category: Aggression, Anxiety & Fear from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&ind=600...
Scraping category: 10 Ways to Build Confidence in Shy and Fearful Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&id=10738181&ind=601&objTypeID=10...
Scraping category: Aggression Between Familiar Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&id=10441666&ind=602&objTypeID=10...
Scraping category: Bite Levels in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&id=10004159&ind=603&objTypeID=1007...
Scraping category: Car Ride Anxiety in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&id=10052310&ind=604&objTypeID=1007...
Scraping category: Compulsive and Repetitive Behavior in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254077&id=11526425&ind=605&objTypeID=10...
Scraping category: Fear and Fear

Scraping category: House Training Adult Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=10981138&ind=651&objTypeID=10...
Scraping category: Housetraining an Adult Dog or Rescue from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=4951739&ind=652&objTypeID=1007...
Scraping category: How to Stop Pulling from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=10751757&ind=653&objTypeID=10...
Scraping category: How to Teach Your Dog to Come to You from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=11084855&ind=654&objTypeID=10...
Scraping category: Identifying a Qualified Dog Trainer or Behavior Professional from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=10751842&ind=655&objTypeID=10...
Scraping category: Importance of Socializing Puppies and Kittens from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254080&id=8941891&ind=656&objTypeID

In [5]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102887&ind=676"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_cats.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_cats.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Birds", "Reptiles & Amphibians", "Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_cats.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_cats.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_cats.txt.")


Scraping category: Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1...
Scraping category: Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102887&ind=676...
Scraping category: Diseases and Conditions from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102903&ind=677...
Scraping category: Bilious Vomiting Syndrome in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102903&id=12296225&ind=678&objTypeID=1007...
Scraping category: Allergies & Immune System from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254055&ind=679...
Scraping category: Adverse Reactions to Spot-on Flea and Tick Products from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254055&id=6386334&ind=680&objTypeID=1007...
Scraping category: Allergic Conjunctivitis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254055&id=10517463&ind=681&objTypeID

Scraping category: Multiple Myeloma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=9595860&ind=726&objTypeID=1007...
Scraping category: Myeloma-Related Disorders in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=4952707&ind=727&objTypeID=1007...
Scraping category: Nasal Squamous Cell Carcinoma in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=4952685&ind=728&objTypeID=1007...
Scraping category: Oral Squamous Cell Carcinoma in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=4952735&ind=729&objTypeID=1007...
Scraping category: Oral Squamous Cell Carcinoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=4952673&ind=730&objTypeID=1007...
Scraping category: Transitional Cell Carcinoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254058&id=4951982&ind=73

Scraping category: Chemical Injuries: First Aid from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=4951348&ind=776&objTypeID=1007...
Scraping category: Cutaneous Lymphoma in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=9817631&ind=777&objTypeID=1007...
Scraping category: Cutaneous Vasculitis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=11409108&ind=778&objTypeID=1007...
Scraping category: Cuterebriasis is a Parasite Causing Skin Infections in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=4952530&ind=779&objTypeID=1007...
Scraping category: Demodectic Mange in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=4951886&ind=780&objTypeID=1007...
Scraping category: Eosinophilic Granuloma Complex in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254060&id=4951528&ind=781&objT

Scraping category: Inflammatory Bowel Disease in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=4951476&ind=826&objTypeID=1007...
Scraping category: Irritable Bowel Syndrome (IBS) in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=4952228&ind=827&objTypeID=1007...
Scraping category: Linear Foreign Bodies in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=6075371&ind=828&objTypeID=1007...
Scraping category: Liver Tumors and Cancers in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=4952830&ind=829&objTypeID=1007...
Scraping category: Pancreatitis in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=4951457&ind=830&objTypeID=1007...
Scraping category: Portosystemic Shunt in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254061&id=4952622&ind=831&objTypeID=

Scraping category: Eye Removal (Enucleation) in Pets from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=4951449&ind=875&objTypeID=1007...
Scraping category: Glaucoma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=6097123&ind=876&objTypeID=1007...
Scraping category: Haws Syndrome in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=9479407&ind=877&objTypeID=1007...
Scraping category: Herpes Viral Conjunctivitis: A Feline Problem from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=4951824&ind=878&objTypeID=1007...
Scraping category: Iris Coloboma in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=4952647&ind=879&objTypeID=1007...
Scraping category: Progressive Retinal Atrophy (PRA) in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254064&id=5792409&ind=880&objTypeID=1007...
Scraping cate

Scraping category: Bartonella and Cat Scratch Fever from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254066&id=4952003&ind=927&objTypeID=1007...
Scraping category: Blastomycosis is a Systemic Fungal infection Affecting Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254066&id=4952436&ind=928&objTypeID=1007...
Scraping category: Bloody Nose (Epistaxis) in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254066&id=4952051&ind=929&objTypeID=1007...
Scraping category: Cats and Ferrets Had SARS in Laboratory Setting from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254066&id=4952001&ind=930&objTypeID=1007...
Scraping category: Cellulitis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254066&id=11604956&ind=931&objTypeID=1007...
Scraping category: Clostridium difficile Becoming more Common in North America from https://veterinarypartner.vin.com/default

Scraping category: Cognitive Dysfunction Syndrome in Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=9773661&ind=976&objTypeID=1007...
Scraping category: Hepatic Encephalopathy in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=10223462&ind=977&objTypeID=1007...
Scraping category: Horner's Syndrome in Cats and Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=4951369&ind=978&objTypeID=1007...
Scraping category: Hydrocephalus (Water on the Brain) in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=4952484&ind=979&objTypeID=1007...
Scraping category: Hyperesthesia Syndrome in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=4952886&ind=980&objTypeID=1007...
Scraping category: Hyperesthesia Syndrome in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254069&id=11488965&ind=981&ob

Scraping category: Paraphimosis and Phimosis in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254071&id=10820134&ind=1026&objTypeID=1007...
Scraping category: Pyometra in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254071&id=4951481&ind=1027&objTypeID=1007...
Scraping category: Reproductive Cycle of Cats May Surprise You from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254071&id=6656707&ind=1028&objTypeID=1007...
Scraping category: Respiratory System: Lungs & Airways from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254072&ind=1029...
Scraping category: Breathing Problems: First Aid from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254072&id=4951325&ind=1030&objTypeID=1007...
Scraping category: Chylothorax is more Common in Cats than Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254072&id=4952543&ind=1031&objTypeID=1007...
Scraping 

Scraping category: Environment from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&ind=1076...
Scraping category: Cats and High Places: Keeping Them off Counters and Tables from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&id=10402080&ind=1077&objTypeID=10...
Scraping category: How to Beat Confinement Woes in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&id=11815858&ind=1078&objTypeID=10...
Scraping category: Ticks Are Arthropod Parasites for Mammals from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&id=4952474&ind=1079&objTypeID=1007...
Scraping category: Treating your Yard for Fleas and Ticks from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&id=9188881&ind=1080&objTypeID=1007...
Scraping category: Winter Holiday Hazards for Pets from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254053&id=6048033&ind=1081&objTypeID=1007...
Scrapi

Scraping category: Aggression, Anxiety & Fear from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&ind=1127...
Scraping category: Car Ride Anxiety in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&id=10052310&ind=1128&objTypeID=1007...
Scraping category: Fear of Noises in Dogs and Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&id=9925149&ind=1129&objTypeID=1007...
Scraping category: Fear-related Aggression in Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&id=10741325&ind=1130&objTypeID=10...
Scraping category: Feline Play and Play Aggression from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&id=11273009&ind=1131&objTypeID=10...
Scraping category: Halloween is a Scary Night for Pets from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254046&id=6437910&ind=1132&objTypeID=1007...
Scraping category: Human Directed Aggressi

In [1]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102890&ind=1161"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_Horses.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_Horses.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Birds", "Reptiles & Amphibians", "Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_Horses.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_Horses.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_Horses.txt.")


Scraping category: Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1...
Scraping category: Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102887&ind=676...
Scraping category: Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102890&ind=1161...
Scraping category: Diseases and Conditions from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102907&ind=1163...
Scraping category: Allergies & Immune System from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254025&ind=1164...
Scraping category: Allergy Testing in Horses with Asthma from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254025&id=10874323&ind=1165&objTypeID=1007...
Scraping category: Chronic Progressive Lymphedema in Draft Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254025&id=6684460&ind=1166&objTypeID=1007...
Scraping category: Cutaneous Lymphangitis in Horses from 

Scraping category: Pythiosis in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=4952878&ind=1211&objTypeID=1007...
Scraping category: Rain Scald and Ringworm in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=4952753&ind=1212&objTypeID=1007...
Scraping category: Raised Skin Lesions (Urticaria) are Signs of Disease in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=6048394&ind=1213&objTypeID=1007...
Scraping category: Serious Lower Leg Wounds in Horses and Donkeys from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=9572447&ind=1214&objTypeID=1007...
Scraping category: Skin Disease in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=4952720&ind=1215&objTypeID=1007...
Scraping category: Skin Disease in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254030&id=10165944&ind=1216&objTypeID=1007.

Scraping category: Stomach Ulcers in Donkeys from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254031&id=9708839&ind=1260&objTypeID=1007...
Scraping category: Supplement Effectiveness on Equine Stomach Ulcers from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254031&id=4952727&ind=1261&objTypeID=1007...
Scraping category: Treating Equine Stomach Ulcers with Omeprazole from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254031&id=10632317&ind=1262&objTypeID=1007...
Scraping category: Ears from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254032&id=8808314&ind=1263...
Scraping category: Problems with the Ear Canal of Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254032&id=8808314&ind=1264&objTypeID=1007...
Scraping category: Endocrine: Diabetes, Thyroid, Adrenal from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254033&ind=1265...
Scraping category: Cushing's Diagnosis Using

Scraping category: Parasite Resistance in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=4952767&ind=1310&objTypeID=1007...
Scraping category: Pastern Dermatitis in Horses Caused by Mites from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=4952734&ind=1311&objTypeID=1007...
Scraping category: Pigeon Fever in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=4952380&ind=1312&objTypeID=1007...
Scraping category: Pinworms in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=6934145&ind=1313&objTypeID=1007...
Scraping category: Piroplasmosis and How to Protect your Horse from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=4953003&ind=1314&objTypeID=1007...
Scraping category: Piroplasmosis in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254036&id=4952896&ind=1315&objTypeID=1007...
Scraping category: Poto

Scraping category: Are your Horse’s Feet Being Trimmed Correctly? from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952689&ind=1360&objTypeID=1007...
Scraping category: Arthritis in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=5467638&ind=1361&objTypeID=1007...
Scraping category: Arthritis in the Neck Can Cause Equine Lameness from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=9932813&ind=1362&objTypeID=1007...
Scraping category: Arthritis Treatment in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952816&ind=1363&objTypeID=1007...
Scraping category: Back or Neck Pain in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=10906047&ind=1364&objTypeID=1007...
Scraping category: Back Pain in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952536&ind=1365&objTypeID=1007...
Scraping categ

Scraping category: Seasonal Pasture Myopathy in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=5276866&ind=1409&objTypeID=1007...
Scraping category: Shivers in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952890&ind=1410&objTypeID=1007...
Scraping category: Splint Bone Disease in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952811&ind=1411&objTypeID=1007...
Scraping category: Splint Bone Fractures in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952991&ind=1412&objTypeID=1007...
Scraping category: Stem Cell Treatments in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952876&ind=1413&objTypeID=1007...
Scraping category: Subsolar Bruising in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254040&id=4952777&ind=1414&objTypeID=1007...
Scraping category: Suspensory Lig

Scraping category: Embryo Transfer in Mares from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=4952920&ind=1459&objTypeID=1007...
Scraping category: Estrous Behavior in Mules from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=9271667&ind=1460&objTypeID=1007...
Scraping category: Estrous Cycle Control in Mares from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=4952485&ind=1461&objTypeID=1007...
Scraping category: Exercising Mares in Early Pregnancy from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=4952251&ind=1462&objTypeID=1007...
Scraping category: Factors Affecting Pregnancy in Performance Mares from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=7416915&ind=1463&objTypeID=1007...
Scraping category: Feeding the Lean Lactating Mare at Weaning from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260495&id=7972955&ind=1464&objTypeID=1007..

Scraping category: Fluid and Electrolyte Imbalance in Endurance Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=5933837&ind=1509&objTypeID=1007...
Scraping category: Grazing Muzzles Can Decrease Pasture Consumption from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=4952765&ind=1510&objTypeID=1007...
Scraping category: Hay and Asthma in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=9815062&ind=1511&objTypeID=1007...
Scraping category: Hay Damaged by Rain for Horses and Cattle from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=8623608&ind=1512&objTypeID=1007...
Scraping category: Hay Replacement Options for Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=5333801&ind=1513&objTypeID=1007...
Scraping category: Hay Storage Tips from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260496&id=4952421&ind=1514&objTyp

Scraping category: Cleaning and Disinfecting Horse Stalls and Trailers from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=7791504&ind=1558&objTypeID=1007...
Scraping category: Decreasing Horse Colic in Winter from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=4952134&ind=1559&objTypeID=1007...
Scraping category: Disaster Preparedness for Livestock from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=11367749&ind=1560&objTypeID=1007...
Scraping category: Disease Protection at Horse Shows from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=5456664&ind=1561&objTypeID=1007...
Scraping category: Dust Management in your Riding Arena from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=5480106&ind=1562&objTypeID=1007...
Scraping category: Extreme Heat Care for Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260497&id=7988273&ind=1563&objT

Scraping category: Horse Hay and Pasture Testing for the Winter from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=8772234&ind=1608&objTypeID=1007...
Scraping category: How Often Should you Bathe your Horse? from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=10115770&ind=1609&objTypeID=1007...
Scraping category: Hydrotherapy Used in Equine Rehabilitation from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=10687553&ind=1610&objTypeID=1007...
Scraping category: Influenza Vaccination in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=4952538&ind=1611&objTypeID=1007...
Scraping category: Injecting Alcohol into Horse's Hock Joints from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=5138481&ind=1612&objTypeID=1007...
Scraping category: Injuries in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=4952714&ind=1613&obj

Scraping category: Tetanus Antitoxin Compared to Tetanus Toxoid Use in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=4952943&ind=1657&objTypeID=1007...
Scraping category: The Risk of Rectal Palpation in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=8940951&ind=1658&objTypeID=1007...
Scraping category: The Veterinarian and Farrier Relationship from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=9253123&ind=1659&objTypeID=1007...
Scraping category: Too Hot to Ride Your Horse? from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=5138488&ind=1660&objTypeID=1007...
Scraping category: Transitioning Your Horse from Shod to Barefoot from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=11191208&ind=1661&objTypeID=1007...
Scraping category: Transporting Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=260499&id=5840529

Scraping category: Behavior from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102905&ind=1706...
Scraping category: Aggression, Anxiety & Fear from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254014&ind=1707...
Scraping category: Abnormal Behavior in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254014&id=4952829&ind=1708&objTypeID=1007...
Scraping category: Antianxiety Supplements in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254014&id=8706234&ind=1709&objTypeID=1007...
Scraping category: Behavioral Problems in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254014&id=10844503&ind=1710&objTypeID=1007...
Scraping category: Calming Drugs for Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254014&id=4952957&ind=1711&objTypeID=1007...
Scraping category: Cribbing in Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=

In [7]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102889&ind=1733"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_BirdS.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_BirdS.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Horses", "Reptiles & Amphibians", "Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_BirdS.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_BirdS.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_BirdS.txt.")


Scraping category: Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1...
Scraping category: Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102887&ind=676...
Scraping category: Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102890&ind=1161...
Scraping category: Birds from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102889&ind=1733...
Scraping category: Diseases and Conditions from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102911&ind=1735...
Scraping category: Cancers, Lumps & Bumps from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254007&id=6664598&ind=1736...
Scraping category: Cancer is a Cellular Delinquent from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254007&id=6664598&ind=1737&objTypeID=1007...
Scraping category: Dermatology: Skin & Nails from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=254008&ind

Scraping category: Healthy Pets, Happy Owners from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102895&ind=2220...
Scraping category: About Veterinary Partner from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102896&ind=2354...
Data has been saved to Categories_BirdS.txt
Filtered data has been saved to Filtered_Categories_BirdS.txt.


In [1]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102892&ind=1782"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_Reptiles_Amphibians.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_Reptiles_Amphibians.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Horses","Birds","Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_Reptiles_Amphibians.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_Reptiles_Amphibians.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_Reptiles_Amphibians.txt.")


Scraping category: Dogs from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102888&ind=1...
Scraping category: Cats from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102887&ind=676...
Scraping category: Horses from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102890&ind=1161...
Scraping category: Birds from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102889&ind=1733...
Scraping category: Reptiles & Amphibians from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102892&ind=1782...
Scraping category: Diseases and Conditions from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102919&ind=1783...
Scraping category: Cancers, Lumps & Bumps from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253983&id=6664598&ind=1784...
Scraping category: Cancer is a Cellular Delinquent from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253983&id=6664598&ind=1785&objTypeID=100

Scraping category: Reproductive System from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253992&id=7996764&ind=1830...
Scraping category: Cloacal Prolapse Occurs in any Reptile Species from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253992&id=7996764&ind=1831&objTypeID=1007...
Scraping category: Urinary System: Bladder & Kidneys from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253993&ind=1832...
Scraping category: Cloacal Prolapse Occurs in any Reptile Species from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253993&id=7996764&ind=1833&objTypeID=1007...
Scraping category: Kidney Disease in Reptiles from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=253993&id=8148630&ind=1834&objTypeID=1007...
Scraping category: Care and Husbandry from https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102918&ind=1835...
Scraping category: Diet & Nutrition from https://veterinarypartner.vin.com/defaul

In [None]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102892&ind=1782"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_Reptiles_Amphibians.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_Reptiles_Amphibians.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Horses","Birds","Small Mammals", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_BirdS.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_Reptiles_Amphibians.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_Reptiles_Amphibians.txt.")


In [None]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102891&ind=1842"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_Small_Mammals.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_Small_Mammals.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Horses","Birds","Reptiles & Amphibians", "Pigs", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_BirdS.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_Small_Mammals.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_Small_Mammals.txt.")


In [None]:
import requests
from bs4 import BeautifulSoup
import time

# Base URL for the main page with category list
base_url = "https://veterinarypartner.vin.com/default.aspx?pid=19239&catId=102893&ind=1912"

# Headers to mimic a browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Send a GET request to fetch the main page content
response = requests.get(base_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse the main page content with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the main category list
    category_links = []
    tree_menu = soup.find("ul", id="topNode")  # Main category container
    
    if tree_menu:
        # Iterate over each <li> item that contains category links
        for li in tree_menu.find_all("li", class_="tree-node"):
            # Find the link within each node
            link = li.find("a")
            if link and 'href' in link.attrs:
                category_name = link.get_text(strip=True)
                category_url = link["href"]
                full_url = f"https://veterinarypartner.vin.com{category_url}"  # Construct full URL
                category_links.append((category_name, full_url))

    # Now visit each category link and scrape its content
    with open("Categories_Pigs.txt", "w", encoding="utf-8") as file:
        for category_name, url in category_links:
            print(f"Scraping category: {category_name} from {url}...")
            category_response = requests.get(url, headers=headers)
            if category_response.status_code == 200:
                category_soup = BeautifulSoup(category_response.text, 'html.parser')
                
                # Extract all paragraph text within the category page
                paragraphs = category_soup.find_all("p")
                category_content = ' '.join([para.get_text(strip=True) for para in paragraphs])
                
                # Write category name and content to the file
                file.write(f"Category: {category_name}\n")
                file.write(f"{category_content}\n\n")
                
                # Optional: pause between requests to avoid overloading the server
                time.sleep(2)
            else:
                print(f"Failed to retrieve data for category: {category_name} (Status code: {category_response.status_code})")
    
    print("Data has been saved to Categories_Pigs.txt")

else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    
    # Categories to exclude
excluded_categories = [
    "Dogs","Cats","Horses","Birds","Reptiles & Amphibians", "Small Mammals", "Ruminants", 
    "Medications", "Healthy Pets, Happy Owners", "About Veterinary Partner"
]

# Open the scraped data file
with open("Categories_BirdS.txt", "r", encoding="utf-8") as file:
    data = file.read()

# Split the data into sections based on the category titles
sections = data.split("\n\n")  # Assuming categories are separated by double newlines

# Filter out the sections that correspond to the unwanted categories
filtered_sections = []
for section in sections:
    # Check if any excluded category is in the section
    if not any(category in section for category in excluded_categories):
        filtered_sections.append(section)

# Write the filtered data back to a new file
with open("Filtered_Categories_Pigs.txt", "w", encoding="utf-8") as file:
    file.write("\n\n".join(filtered_sections))

print("Filtered data has been saved to Filtered_Categories_Pigs.txt.")


In [221]:
import torch
import numpy as np
import torch.nn as nn
from torch.nn import functional as F

# Set the random seed for reproducibility
torch.manual_seed(256)

# Select device: CUDA (GPU) if available, else CPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Training hyperparameters
block_size = 40          # Number of tokens in a sequence
batch_size = 32          # Number of samples per batch
max_iters = 6000         # Total number of training iterations
eval_interval = 500      # Interval between evaluations
learning_rate = 0.0003   # Learning rate
eval_iters = 300         # Number of iterations for evaluation
vocab_size = 110          # Size of the vocabulary (adjust this based on your dataset)

# Model architecture hyperparameters
n_embd = 512             # Embedding size (dimension of the token embeddings)
n_head = 6              # Number of attention heads
n_layer = 4              # Number of transformer layers
dropout = 0.6            # Dropout rate to prevent overfitting


In [222]:
import os

# List of specific text files to read
input_files = ['Filtered_Categories_dogs.txt', 'Filtered_Categories_cats.txt', 'Filtered_Categories_Horses.txt','Filtered_Categories_BirdS.txt','Filtered_Categories_Reptiles_Amphibians.txt'] # Add your file names here

# Initialize an empty string to hold the concatenated text
text = ''

# Define a function to read and process the file in chunks
def read_file_in_chunks(file_path, chunk_size=1024*1024):  # 1MB chunks by default
    with open(file_path, 'r', encoding='utf-8') as f:
        while True:
            chunk = f.read(chunk_size)  # Read the next chunk
            if not chunk:
                break  # If no more data, exit the loop
            yield chunk  # Yield each chunk for processing

# Loop through each file and read it in chunks
for input_file in input_files:
    try:
        for chunk in read_file_in_chunks(input_file):
            text += chunk  # Concatenate the chunk to the full text
        print(f"Successfully read {input_file}")
    except Exception as e:
        print(f"Error reading {input_file}: {e}")

# Now 'text' contains the combined content from the specified files
print(f"Total length of text from specified files: {len(text)} characters")



Successfully read Filtered_Categories_dogs.txt
Successfully read Filtered_Categories_cats.txt
Successfully read Filtered_Categories_Horses.txt
Successfully read Filtered_Categories_BirdS.txt
Successfully read Filtered_Categories_Reptiles_Amphibians.txt
Total length of text from specified files: 4775985 characters


In [223]:
unique_characters = list(set(text))
print(unique_characters)


['é', '°', 'H', 'x', 'R', ']', 'k', 'Y', 'a', 'B', 'Q', 'v', 'j', '“', '8', '•', '5', '*', '+', 'D', '4', '¾', '\n', '/', "'", '⁰', 'i', 'd', 'X', '!', 'h', '…', ':', '6', '&', 'u', 'G', 'w', 'N', ' ', '@', '>', 'S', 'V', 'U', '(', 'J', 'T', 'F', 'b', 'y', '’', 'z', '3', '™', 'P', '9', 'g', '#', '=', 'l', '½', 'M', 'ə', 'r', 'è', '—', '<', '®', 's', 'Z', '1', 'o', '.', 'L', ';', 'C', '"', 'm', 'p', '‘', 'ī', 'E', '7', '$', 'W', 't', 'f', 'A', '2', 'e', 'n', '-', '¼', 'q', ')', ',', '?', '–', 'º', 'c', 'I', 'O', '”', '0', '%', 'K', '‒', '\xa0', '[']


In [224]:
# Define characters you want to keep
valid_characters = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,!?;:'\"()-")

# Filter out unwanted characters
cleaned_text = [ch for ch in text if ch in valid_characters]

# Get unique cleaned characters
unique_cleaned_characters = list(set(cleaned_text))
print(unique_cleaned_characters)


['h', ':', 'H', 'x', 'l', 'R', '6', 'E', '7', 'M', 'W', 't', 'f', 'k', 'Y', 'a', 'B', 'r', 'u', 'A', 'G', '2', 'e', 'w', 'n', '-', 'Q', 'N', 'q', 'v', ')', 'j', '8', '5', 's', ',', 'S', '?', 'V', 'D', 'X', '4', 'Z', '1', 'U', 'o', '.', '(', 'L', ';', 'c', 'J', 'T', 'I', "'", 'O', 'F', 'b', 'C', '0', '"', 'y', 'm', 'i', 'p', 'z', '3', 'P', 'K', '9', 'd', 'g', '!']


In [225]:
# Create a character-to-index dictionary
char_to_idx = {char: idx for idx, char in enumerate(sorted(set(cleaned_text)))}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}

# Define vocab size
vocab_size = len(char_to_idx)

# Print the character-to-index mapping and vocab size
print("Character to Index Mapping:", char_to_idx)
print("Vocab Size:", vocab_size)


Character to Index Mapping: {'!': 0, '"': 1, "'": 2, '(': 3, ')': 4, ',': 5, '-': 6, '.': 7, '0': 8, '1': 9, '2': 10, '3': 11, '4': 12, '5': 13, '6': 14, '7': 15, '8': 16, '9': 17, ':': 18, ';': 19, '?': 20, 'A': 21, 'B': 22, 'C': 23, 'D': 24, 'E': 25, 'F': 26, 'G': 27, 'H': 28, 'I': 29, 'J': 30, 'K': 31, 'L': 32, 'M': 33, 'N': 34, 'O': 35, 'P': 36, 'Q': 37, 'R': 38, 'S': 39, 'T': 40, 'U': 41, 'V': 42, 'W': 43, 'X': 44, 'Y': 45, 'Z': 46, 'a': 47, 'b': 48, 'c': 49, 'd': 50, 'e': 51, 'f': 52, 'g': 53, 'h': 54, 'i': 55, 'j': 56, 'k': 57, 'l': 58, 'm': 59, 'n': 60, 'o': 61, 'p': 62, 'q': 63, 'r': 64, 's': 65, 't': 66, 'u': 67, 'v': 68, 'w': 69, 'x': 70, 'y': 71, 'z': 72}
Vocab Size: 73


In [226]:
the_chars  = sorted(list(set(text)))  # Get unique characters and sort them

vocab_size = len(the_chars)  ## 88
print("Vocab Size:", vocab_size)

# Create mappings for character-to-index and index-to-character
stoi = { ch: i for i, ch in enumerate(the_chars) }  # Map each character to an index
itos = { i: ch for i, ch in enumerate(the_chars) }  # Map each index to a character

print(stoi)
print(itos)


Vocab Size: 110
{'\n': 0, ' ': 1, '!': 2, '"': 3, '#': 4, '$': 5, '%': 6, '&': 7, "'": 8, '(': 9, ')': 10, '*': 11, '+': 12, ',': 13, '-': 14, '.': 15, '/': 16, '0': 17, '1': 18, '2': 19, '3': 20, '4': 21, '5': 22, '6': 23, '7': 24, '8': 25, '9': 26, ':': 27, ';': 28, '<': 29, '=': 30, '>': 31, '?': 32, '@': 33, 'A': 34, 'B': 35, 'C': 36, 'D': 37, 'E': 38, 'F': 39, 'G': 40, 'H': 41, 'I': 42, 'J': 43, 'K': 44, 'L': 45, 'M': 46, 'N': 47, 'O': 48, 'P': 49, 'Q': 50, 'R': 51, 'S': 52, 'T': 53, 'U': 54, 'V': 55, 'W': 56, 'X': 57, 'Y': 58, 'Z': 59, '[': 60, ']': 61, 'a': 62, 'b': 63, 'c': 64, 'd': 65, 'e': 66, 'f': 67, 'g': 68, 'h': 69, 'i': 70, 'j': 71, 'k': 72, 'l': 73, 'm': 74, 'n': 75, 'o': 76, 'p': 77, 'q': 78, 'r': 79, 's': 80, 't': 81, 'u': 82, 'v': 83, 'w': 84, 'x': 85, 'y': 86, 'z': 87, '\xa0': 88, '®': 89, '°': 90, 'º': 91, '¼': 92, '½': 93, '¾': 94, 'è': 95, 'é': 96, 'ī': 97, 'ə': 98, '‒': 99, '–': 100, '—': 101, '‘': 102, '’': 103, '“': 104, '”': 105, '•': 106, '…': 107, '⁰': 108,

In [227]:
vocab_size = len(the_chars)  # Should be 110 in this case
print("Vocab Size:", vocab_size)

Vocab Size: 110


In [228]:
encode = lambda s: [stoi[c] for c in s]

encode("bahh")


[63, 62, 69, 69]

In [229]:
decode = lambda l: ''.join(itos[i] for i in l)
  

decode([63, 62, 69, 69])

'bahh'

In [230]:
data = torch.tensor(encode(text[:10000]), dtype=torch.long)  # You can adjust the slice size (10000) to fit your memory
print(data[:10])  # Print only the first 10 elements


tensor([36, 62, 81, 66, 68, 76, 79, 86, 27,  1])


In [231]:

n          = int(   0.9*len(data)   )

train_data = data[:n]
val_data   = data[n:]

In [232]:

def get_batch(split):
    if split == "train":
        data = train_data
    else:
        data = val_data
        
    ix = torch.randint(   len(data) - block_size, (batch_size,)   )
    
    x  = torch.stack(    [  data[   i : i+block_size ]     for i in ix ]    ) 
    y  = torch.stack(    [  data[ i+1 : i+1+block_size ]   for i in ix ]    )
    
    x, y = x.to(device), y.to(device)

    return x, y
x, y = get_batch("train")
print(x.shape)  # Shape of the input batch
print(y.shape)  # Shape of the target batch


torch.Size([32, 40])
torch.Size([32, 40])


In [233]:
temp_batch_size = 4
temp_block_size = 16

# select random starting points for the 4 sentences
ix = torch.randint(
    len(data) - temp_block_size,
    (temp_batch_size,)
)

print(ix)


tensor([ 857, 7800, 3828,  463])


In [234]:
for index_temp in ix:
    print(itos[data[index_temp].item()])


 
s
e
f


In [235]:
x = torch.stack([data[i : i + temp_block_size] for i in ix])  # Create input batches from data
y = torch.stack([data[i + 1 : i + 1 + temp_block_size] for i in ix])  # Create target batches (shifted by 1)

print(x)  # Print the batch input
print(y)  # Print the target output


tensor([[ 1, 83, 66, 81, 66, 79, 70, 75, 62, 79, 70, 62, 75, 15,  1, 34],
        [80,  1, 80, 82, 77, 77, 76, 79, 81, 70, 83, 66,  1, 64, 62, 79],
        [66, 75, 81, 70, 76, 75,  1, 70, 80,  1, 81, 69, 66,  1, 63, 66],
        [67,  1, 81, 69, 66,  1, 83, 70, 66, 84, 80,  1, 76, 79,  1, 64]])
tensor([[83, 66, 81, 66, 79, 70, 75, 62, 79, 70, 62, 75, 15,  1, 34, 75],
        [ 1, 80, 82, 77, 77, 76, 79, 81, 70, 83, 66,  1, 64, 62, 79, 66],
        [75, 81, 70, 76, 75,  1, 70, 80,  1, 81, 69, 66,  1, 63, 66, 80],
        [ 1, 81, 69, 66,  1, 83, 70, 66, 84, 80,  1, 76, 79,  1, 64, 76]])


In [236]:
eval_iters = 100  # Set this to the number of iterations you want for evaluation


In [237]:
@torch.no_grad()  # Disables gradient computation for efficient evaluation
def estimate_loss():
    out = {}
    model.eval()  # Set model to evaluation mode (disables dropout, etc.)
    
    # Loop through training and validation splits
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)  # To store losses for each iteration
        for k in range(eval_iters):
            X, Y = get_batch(split)  # Get batch from the corresponding split (train/val)
            logits, loss = model(X, Y)  # Forward pass: get logits and loss
            losses[k] = loss.item()  # Store loss for this iteration
            
        out[split] = losses.mean()  # Store the mean loss for the split
    
    model.train()  # Set model back to training mode
    return out


In [238]:
class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        
        self.key   = nn.Linear(n_embd, head_size, bias=False)  ## [512, 64]
        self.query = nn.Linear(n_embd, head_size, bias=False)  ## [512, 64]
        self.value = nn.Linear(n_embd, head_size, bias=False)  ## [512, 64]

        tril_def = torch.tril( torch.ones(block_size, block_size) )  ## [40, 40]
        
        self.register_buffer('tril', tril_def)
        
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, E = x.shape   ## [batch_size, 40, 512]
        
        k = self.key(x)  ## k = (B, T, 64)
        q = self.query(x)  ## q = (B, T, 64)

        E2 = self.key.weight.shape[1]  ## dynamic E2 based on head_size
        wei = q @ k.transpose(-2, -1) * E2 ** -0.5  ## (B, T, E) @ (B, E, T) -> (B, T, T)
        
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))  ## apply mask
        
        wei = F.softmax(wei, dim=-1)  ## (B, T, T)
        wei = self.dropout(wei)
        
        v = self.value(x)  ## v = (B, T, 64)
        out = wei @ v  ## (B, T, T) @ (B, T, 64) -> (B, T, 64)
        
        return out


In [239]:
class FeedForward(nn.Module):
    def __init__(self, n_embd):  # n_embd is the embedding size (e.g., 512)
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),  # Linear layer to expand from 512 to 2048
            nn.ReLU(),                      # ReLU activation
            nn.Linear(4 * n_embd, n_embd),  # Linear layer to shrink back from 2048 to 512
            nn.Dropout(dropout),            # Dropout layer
        )

    def forward(self, x):
        return self.net(x)  # Return the output of the feed-forward network


In [240]:
class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, head_size):  # num_heads: number of attention heads, head_size: the size of each head
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])  # List of attention heads
        self.proj = nn.Linear(num_heads * head_size, n_embd)  # Linear layer to project the concatenated heads to n_embd
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        # Concatenate the output from all heads
        out = torch.cat([h(x) for h in self.heads], dim=-1)  # Shape: [batch_size, sequence_length, num_heads * head_size]
        
        # Apply the projection to map the concatenated output to n_embd
        out = self.proj(out)  # Shape: [batch_size, sequence_length, n_embd]
        
        # Apply dropout
        out = self.dropout(out)
        
        return out


In [241]:
class Block(nn.Module):
    
    def __init__(self, n_embd, n_head):     ## (512, 8)
        super().__init__()
        head_size = n_embd // n_head        ## 64
        self.sa   = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedForward( n_embd)    ## 512
        self.ln1  = nn.LayerNorm(n_embd)
        self.ln2  = nn.LayerNorm(n_embd)
        
    def forward(self, x):
        x = x + self.sa(     self.ln1(x)      )
        x = x + self.ffwd(   self.ln2(x)      )
        return x

In [242]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GPTModel(nn.Module):
    def __init__(self):
        super().__init__()
        # Token embedding layer (vocab_size x n_embd)
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)  # [vocab_size, n_embd]
        
        # Positional encoding layer (block_size x n_embd)
        self.pos_emb_table = nn.Embedding(block_size, n_embd)  # [block_size, n_embd]
        
        # Transformer blocks (Stack of transformer layers)
        self.blocks = nn.Sequential(
            *[Block(n_embd, n_head=n_head) for _ in range(n_layer)]  # Stack of transformer layers
        )
        
        # Final Layer Normalization
        self.ln_f = nn.LayerNorm(n_embd)
        
        # Output linear layer for token prediction
        self.lm_ffw_head = nn.Linear(n_embd, vocab_size)  # [n_embd, vocab_size]
    
    def forward(self, idx, targets=None):
        B, T = idx.shape  # B: batch size, T: sequence length
        # Token and position embeddings
        tok_emb = self.token_embedding_table(idx)  # [B, T, n_embd]
        pos_emb = self.pos_emb_table(torch.arange(T, device=device))  # [T, n_embd]
        
        # Add token and positional embeddings
        x = tok_emb + pos_emb  # [B, T, n_embd]

        # Transformer blocks (stack of layers)
        x = self.blocks(x)  # [B, T, n_embd]
        
        # Apply final layer normalization
        x = self.ln_f(x)  # [B, T, n_embd]
        
        # Linear layer for token prediction
        logits = self.lm_ffw_head(x)  # [B, T, vocab_size]

        # If targets are provided, calculate the loss
        if targets is None:
            loss = None
        else:
            B, T, E = logits.shape
            logits = logits.view(B * T, E)  # [B*T, vocab_size]
            targets = targets.view(B * T)  # [B*T]
            loss = F.cross_entropy(logits, targets)  # Calculate loss

        return logits, loss
    
    def generate(self, idx, max_new_tokens):
        """
        Generates a sequence of tokens given an initial input sequence.

        Args:
            idx: Initial sequence of tokens [B, T]
            max_new_tokens: Number of new tokens to generate

        Returns:
            Generated sequence of tokens [B, T + max_new_tokens]
        """
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]  # Use the last block_size tokens
            logits, _ = self(idx_cond)  # Get predictions for the next token
            
            logits = logits[:, -1, :]  # Focus on the last token (B, vocab_size)
            probs = F.softmax(logits, dim=-1)  # Get probabilities for the next token
            
            # Sample the next token using multinomial distribution
            idx_next = torch.multinomial(probs, num_samples=1)  # [B, 1]
            idx = torch.cat((idx, idx_next), dim=1)  # Append new token to the sequence

        return idx


In [243]:
# Initialize the model
model = GPTModel()

# Move the model to the appropriate device (GPU or CPU)
m = model.to(device)

# Initialize the optimizer (Adam optimizer)
optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.9)  # Learning rate scheduler

# Early stopping parameters
best_val_loss = float('inf')
patience_counter = 0
patience = 12


In [244]:
# Increase dropout in your classes (already discussed)
dropout = 0.6  # Update dropout

# Initialize optimizer with weight decay
optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate, weight_decay=5e-5)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.9)

# Early stopping variables
best_val_loss = float('inf')
patience = 12
patience_counter = 0

for iter in range(max_iters):
    if iter % eval_interval == 0:
        losses = estimate_loss()  # Assuming this function exists and is adapted for your use case
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")
        
        # Early stopping check
        if losses['val'] < best_val_loss:
            best_val_loss = losses['val']
            patience_counter = 0  # Reset patience counter if validation loss improves
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print("Early stopping...")
            break  # Stop training if patience is exceeded
    
    # Get a batch of data for training
    xb, yb = get_batch('train')  # Adjust get_batch function for your VetGPT dataset
    
    # Forward pass: Calculate logits and loss
    logits, loss = m(xb, yb)  # 'm' is your model
    
    # Zero out gradients before backward pass
    optimizer.zero_grad(set_to_none=True)
    
    # Backpropagate the loss
    loss.backward()
    
    # Step the optimizer to update the model's parameters
    optimizer.step()

    # Step the scheduler
    scheduler.step()


step 0: train loss 4.9107, val loss 4.9179
step 500: train loss 1.6612, val loss 2.3219
step 1000: train loss 1.0363, val loss 2.2559
step 1500: train loss 0.6320, val loss 2.4733
step 2000: train loss 0.3921, val loss 2.8802
step 2500: train loss 0.2915, val loss 3.0789
step 3000: train loss 0.2402, val loss 3.4213
step 3500: train loss 0.2149, val loss 3.5902
step 4000: train loss 0.2094, val loss 3.6470
step 4500: train loss 0.1994, val loss 3.7834
step 5000: train loss 0.1964, val loss 3.8256
step 5500: train loss 0.1924, val loss 3.9534


In [245]:
import torch

# Assuming your model 'm' is defined elsewhere and is already trained

# Define the SOS token id (start of sequence)
id_sos = 0  # Adjust this according to your actual tokenization (e.g., special token for beginning of sequence)

# Create the SOS context (batch size of 1, sequence length of 1)
sos_context = torch.zeros((1, 1), dtype=torch.long, device=device)  # Assume 'device' is defined (cuda/cpu)

# Generate text (max_new_tokens defines how many tokens to generate)
generated_text = m.generate(sos_context, max_new_tokens=500)[0].tolist()

# Decode the generated token IDs to human-readable text
# Assuming you have a `decode` function that converts token IDs back to text
decoded_text = decode(generated_text)

# Print the generated text
print(decoded_text)


Category: Common Diseasen othe information contained here is for general purposes only and is not a substitute for advice from your bloode which outhe, de to dely in and to be seen in hedgehogs ranging from obesity to neurological. The cause of wobbly hedgehogs
Something Diarrhea and intestinal inflammation are verience you place on such information is strictly at                your own risk. Links to non-VIN websites do not imply a recommendation or enrient. Sument involves removint- don’ts we


In [246]:
import torch

# Assuming your model 'm' is already trained and ready for inference
# and decode function exists to map token ids back to text

# Define the SOS token id (start of sequence)
id_sos = 1  # Adjust this according to your vocabulary (for example, 1 might represent SOS)

# Create the SOS context (batch size of 1, sequence length of 1)
sos_context = torch.ones((1, 1), dtype=torch.long, device=device)  # Assuming SOS token is 1

# Generate text (max_new_tokens defines how many tokens to generate)
generated_text = m.generate(sos_context, max_new_tokens=500)[0].tolist()  # Adjust max_new_tokens as needed

# Decode the generated token IDs to human-readable text
# Assuming you have a 'decode' function that converts token IDs back to text
decoded_text = decode(generated_text)

# Print the generated text
print(decoded_text)

 only be done withVIN®'s express permission. The informations are something with a fecal culture. Treatment involves supportive care and antibiotics. In severe cases, oxygen and insects to eliminate infestations also include crusting and flaking ofr get leatively common in hedartion or endorseatelly, the damage to they have, but they on a see of can be diagnosed with a a fecal culture. Treatmenrined by VIN®.orthered by:VIN.com

Category: Diseases and Conditions
  The content contained within thdl


In [1]:
# Assuming you have a vocab dictionary that maps words to token IDs
vocab = {
    "<PAD>": 0,
    "<SOS>": 1,
    "<EOS>": 2,
    "What": 3,
    "should": 4,
    "I": 5,
    "do": 6,
    "if": 7,
    "my": 8,
    "cat": 9,
    "is": 10,
    "vomiting": 11,
    "and": 12,
    "not": 13,
    "eating": 14,
    "?": 15,
    "<UNK>": 16
}

def encode(text):
    tokens = text.split()  # Tokenize text by spaces (simplified example)
    token_ids = [vocab.get(token, vocab["<UNK>"]) for token in tokens]  # Convert to token IDs (UNK for unknown tokens)
    return token_ids

# Example usage
new_lst = encode("What should I do if my cat is vomiting and not eating?")
print(new_lst)


[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16]


In [2]:
import numpy as np

# Assuming new_lst is the list of token IDs
new_lst = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# Convert the list to a NumPy array
new_np = np.array(new_lst)

# Output the array
print(new_np)


[ 3  4  5  6  7  8  9 10 11 12 13 14 15]


In [5]:
import torch
import numpy as np

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assuming new_lst contains tokenized input
new_lst = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# Convert to NumPy array
new_np = np.array(new_lst)

# Convert to PyTorch tensor on the specified device
new_context = torch.tensor(new_np, dtype=torch.long, device=device)

# Reshape to (1, -1) to make it a 2D tensor with 1 row and as many columns as necessary
new_context = new_context.view((1, -1))

# Output the tensor
print(new_context)


tensor([[ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]], device='cuda:0')


In [8]:

# Initialize and move model to the device
model = GPTModel()
m = model.to(device)

# Example context for generation
new_context = torch.tensor([your_input_tokens], dtype=torch.long, device=device)

# Generate text starting from the context
generated_text = m.generate(new_context, max_new_tokens=500)[0].tolist()

# Decode and print the generated text
print(decode(generated_text))

NameError: name 'GPTModel' is not defined

In [251]:
import torch

# Save model and optimizer state dictionaries along with the epoch
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
}, 'checkpoint.pth')


In [252]:
#checkpoint = torch.load('checkpoint.pth')
checkpoint = torch.load('checkpoint.pth', weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#start_epoch = checkpoint['epoch']