In [None]:
!pip install datasets
!pip install transformers
!pip install torch

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [None]:
import torch

if torch.cuda.is_available():
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA is not available. Using CPU.")


CUDA is available. Using GPU: Tesla T4


In [None]:
import torch
from transformers import GPTNeoForCausalLM, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Ensure CUDA is used if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Limit memory usage to ~13GB on a 16GB GPU (80% memory)
if torch.cuda.is_available():
    torch.cuda.set_per_process_memory_fraction(0.8667, device=0)  # ~13 GB of 15GB

# Load the model and tokenizer
model_name = "EleutherAI/gpt-neo-125M"  # Smaller model to fit Colab T4 memory
model = GPTNeoForCausalLM.from_pretrained(model_name).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Set pad_token to eos_token for compatibility
tokenizer.pad_token = tokenizer.eos_token

# Load dataset
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")

# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding="max_length",
        truncation=True,
        max_length=256,
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# Prepare inputs and labels
def prepare_data_for_gpt(examples):
    inputs = examples['input_ids']
    return {"input_ids": inputs, "labels": inputs}

split_datasets = tokenized_datasets['train'].train_test_split(test_size=0.2)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

train_dataset = train_dataset.map(prepare_data_for_gpt, batched=True)
eval_dataset = eval_dataset.map(prepare_data_for_gpt, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./gptneo_output",
    per_device_train_batch_size=4,  # Increase batch size for higher memory usage
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=4,  # Increase batch size for higher memory usage
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=1000,
    evaluation_strategy="epoch",
    warmup_steps=500,
    weight_decay=0.01,
    fp16=True,  # Mixed precision for memory optimization
    push_to_hub=False,
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

Map:   0%|          | 0/29374 [00:00<?, ? examples/s]

Map:   0%|          | 0/7344 [00:00<?, ? examples/s]



In [None]:
!nvidia-smi
trainer.train()

Thu Dec 12 12:25:13 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P0              32W /  70W |   4099MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

Epoch,Training Loss,Validation Loss
1,0.7717,0.799642


TrainOutput(global_step=3672, training_loss=0.9195141187160882, metrics={'train_runtime': 1350.8664, 'train_samples_per_second': 21.745, 'train_steps_per_second': 2.718, 'total_flos': 3836348638691328.0, 'train_loss': 0.9195141187160882, 'epoch': 1.0})

In [None]:
trainer.save_model("./gptneo_wikitext_model")
tokenizer.save_pretrained("./gptneo_wikitext_model")


('./gptneo_wikitext_model/tokenizer_config.json',
 './gptneo_wikitext_model/special_tokens_map.json',
 './gptneo_wikitext_model/vocab.json',
 './gptneo_wikitext_model/merges.txt',
 './gptneo_wikitext_model/added_tokens.json')

In [None]:
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
import torch

# Check device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load the pretrained model and tokenizer
model_name = "EleutherAI/gpt-neo-125M"
model = GPTNeoForCausalLM.from_pretrained(model_name).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Ensure tokenizer compatibility
tokenizer.pad_token = tokenizer.eos_token

# Function for inference
def infer_with_pretrained_gpt_neo(prompt, model, tokenizer, max_length=100):
    """
    Generate a response from the GPT-Neo model given a prompt.
    """
    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate text
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_return_sequences=1,
        temperature=0.7,  # Controls randomness
        top_p=0.9,  # Top-p sampling for diverse outputs
        do_sample=True
    )

    # Decode the output to a human-readable string
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example inference
prompt = "Can you explain the significance of neural networks in AI?"
response = infer_with_pretrained_gpt_neo(prompt, model, tokenizer)
print("Pretrained GPT-Neo Response:")
print(response)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Pretrained GPT-Neo Response:
Can you explain the significance of neural networks in AI?

AI is a field of study that aims to develop, test and validate a range of AI techniques. It has the potential to improve the quality of human and machine learning algorithms, and to provide new ways of improving machine learning.

The neural networks used in AI are thought to be the first class of technology. The most obvious example is neural network based on the classification of images, which is one of the most challenging tasks in AI


In [None]:
HUGGINGFACE_TOKEN = "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"

In [None]:
from huggingface_hub import login

In [None]:
login(token=HUGGINGFACE_TOKEN)

In [None]:
model.push_to_hub("AmmarA22/gptneo-wikitext-quantized")
tokenizer.push_to_hub("AmmarA22/gptneo-wikitext-quantized")

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AmmarA22/gptneo-wikitext-quantized/commit/521424fc437a0005abe609adc3cab301653c91f8', commit_message='Upload tokenizer', commit_description='', oid='521424fc437a0005abe609adc3cab301653c91f8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AmmarA22/gptneo-wikitext-quantized', endpoint='https://huggingface.co', repo_type='model', repo_id='AmmarA22/gptneo-wikitext-quantized'), pr_revision=None, pr_num=None)