In [1]:
# Joke Generation using GPT-2 in Google Colab

# First install required packages
!pip install transformers torch

from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
from google.colab import files
import os

# Function to load jokes from file
def load_jokes(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        jokes = [line.strip() for line in f if line.strip()]
    return jokes

# Function to generate jokes
def generate_joke(model, tokenizer, prompt):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    output = model.generate(
        input_ids,
        max_length=100,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        temperature=0.9,
        top_k=50,
        top_p=0.95,
        do_sample=True
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Main execution
print("=== Joke Generator using GPT-2 ===")

# File upload in Colab
print("\nPlease upload your jokes file (txt or csv):")
uploaded = files.upload()
file_name = next(iter(uploaded))
print(f"\nUploaded file: {file_name}")

jokes = load_jokes(file_name)
print(f"Loaded {len(jokes)} jokes from the file")

# Load model
print("\nLoading GPT-2 model...")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))

# Generate joke
print("\nGenerating a new joke...")
generated_joke = generate_joke(model, tokenizer, "Tell me a funny joke:")
print("\nGenerated Joke:")
print(generated_joke)

# Save option
print("\nWould you like to download this joke?")
with open('generated_joke.txt', 'w') as f:
    f.write(generated_joke)
files.download('generated_joke.txt')

print("\nDone!")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Saving jokes_dataset.txt to jokes_dataset.txt

Uploaded file: jokes_dataset.txt
Loaded 10 jokes from the file

Loading GPT-2 model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Generating a new joke...

Generated Joke:
Tell me a funny joke:

I like to have fun in the evening.
, "Fairytale Day"
.


(I love the fairy tale ending, the song, and the music. I love how they both look so perfect together. They both have a very different sound. It's more of a "go with the flow", not a fun to dance to. The two of them all look like the same person, but their looks don't seem to be the

Would you like to download this joke?


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Done!


In [6]:
!ls -l

total 12
-rw-r--r-- 1 root root  363 May 20 21:31 generated_joke.txt
-rw-r--r-- 1 root root  641 May 20 21:30 jokes_dataset.txt
drwxr-xr-x 1 root root 4096 May 14 13:38 sample_data


In [7]:
!ls -l *.ipynb

ls: cannot access '*.ipynb': No such file or directory


In [None]:
from google.colab import files
import json

# Upload the notebook file
uploaded = files.upload()
filename = list(uploaded.keys())[0]  # Get the uploaded filename

# Load and clean the notebook
with open(filename, "r", encoding="utf-8") as f:
    notebook = json.load(f)

# Remove problematic metadata
if "metadata" in notebook:
    notebook["metadata"].pop("widgets", None)
    notebook["metadata"].pop("kernelspec", None)

# Save cleaned version
clean_name = filename.replace(".ipynb", "_CLEAN.ipynb")
with open(clean_name, "w", encoding="utf-8") as f:
    json.dump(notebook, f, indent=2)

print(f"Created clean version: {clean_name}")
files.download(clean_name)  # Download the cleaned file