In [8]:
import torch
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import Dataset, DatasetDict
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Force PyTorch to use CPU
device = torch.device("cpu")

print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")
print("Number of GPUs:", torch.cuda.device_count())


CUDA Available: False
CUDA Version: None
GPU Name: No GPU found
Number of GPUs: 0


In [9]:

def load_data(file_path):
    df = pd.read_csv(file_path)
    df["text"] = "Dream: " + df["Dream Symbol"] + "\nInterpretation: " + df["Interpretation"]
    
    # Print the first 5 rows
    print(df.head())  # This will display the first 5 rows of the DataFrame
    
    return Dataset.from_pandas(df[["text"]])

# Load dataset
dataset = load_data(r"dreams_interpretations.csv")
# Split dataset into train (80%) and validation (20%)
split_dataset = dataset.train_test_split(test_size=0.2, seed=42)


  Dream Symbol                                     Interpretation  \
0     Aardvark  To see an aardvark in your dream indicates tha...   
1  Abandonment  To dream that you are abandoned suggests that ...   
2    Abduction  To dream of being abducted indicates that you ...   
3    Aborigine  To see an Aborigine in your dream represents b...   
4     Abortion  To dream that you have an abortion suggests th...   

                                                text  
0  Dream: Aardvark\nInterpretation: To see an aar...  
1  Dream: Abandonment\nInterpretation: To dream t...  
2  Dream: Abduction\nInterpretation: To dream of ...  
3  Dream: Aborigine\nInterpretation: To see an Ab...  
4  Dream: Abortion\nInterpretation: To dream that...  


In [11]:

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 doesn't have a pad token, use EOS instead

model = GPT2LMHeadModel.from_pretrained("gpt2")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

## Training arguments
training_args = TrainingArguments(
    output_dir="./dream_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=500,
    save_total_limit=2,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)
trainer.train()

# Save model
model.save_pretrained("./dream_model_gpt2")
tokenizer.save_pretrained("./dream_model_gpt2")

print("Training complete! Model saved to ./dream_model_gpt2")


Map:   0%|          | 0/902 [00:00<?, ? examples/s]

ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=0.26.0'`

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the trained model and tokenizer
model_path = "./dream_model_gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path)

# Set model to evaluation mode
model.eval()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
def generate_interpretation(dream_symbol, max_length=50):
    # Format input as it was trained
    input_text = f"Dream: {dream_symbol}\nInterpretation:"
    
    # Tokenize input
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

    # Generate interpretation using the model
    output = model.generate(
        input_ids, 
        max_length=max_length, 
        num_return_sequences=1,  # Generate one interpretation
        temperature=0.7,  # Controls randomness (lower = more deterministic)
        top_k=50,  # Limits to top 50 tokens to reduce randomness
        top_p=0.95,  # Nucleus sampling (higher = more random)
        do_sample=True  # Enable sampling for diverse outputs
    )

    # Decode and return the generated text
    interpretation = tokenizer.decode(output[0], skip_special_tokens=True)
    
    return interpretation


In [None]:
dream_examples = [
    "Flying",
    "Snake",
    "Lost in a city",
    "Being chased",
    "Seeing a black cat"
]

for dream in dream_examples:
    interpretation = generate_interpretation(dream)
    print(f"Dream: {dream}\n{interpretation}\n{'-'*50}")
