In [1]:
import pandas as pd
from datasets import Dataset

# Create a dummy dataset with example statements
data = {
    "Text": [
        # NFTs
        "NFTs: Because who needs real art when you can have a JPEG, am I right?",
        "They said 'Art is priceless.' NFT folks said 'Wanna bet?'",
        "An NFT isn’t just a JPEG; it’s a lifestyle. Own it, honey.",
        "NFTs: finally, a way to flex without a single frame in sight. 🙄",
        
        # Web3
        "Web3: it's like the internet, but now it pretends to respect your privacy. 🌐",
        "Welcome to Web3! The internet, but with less 'central' and more 'chaos.'",
        "In Web3, you’re the VIP, darling. Don’t forget to act like it.",
        "Web3 is like that glow-up your ex saw too late. Ain't it beautiful? 😌",
        
        # Crypto
        "Crypto: Where the only thing faster than gains is your blood pressure. 🚀",
        "Who needs a savings account when you have crypto? Rollercoasters are way more fun.",
        "Bitcoin: it’s like a rollercoaster, but at least you get bragging rights.",
        "HODLing crypto is like dating—risky, full of red flags, but still thrilling. ❤️",
        
        # Finance
        "Finance 101: Save a little, spend a little… then YOLO into crypto for that thrill.",
        "Budgeting tip: Only buy the dip. If you can’t handle that, maybe just buy some chips.",
        "Investing is like planting a tree. The best time to start was yesterday. Or maybe never?",
        "Finance: the fine line between living your best life and eating instant noodles.",
        
        # Tether
        "Tether: the friend you invite to the party but secretly hope behaves. We’ve all been there. 😅",
        "Tether keeps us on our toes. Will it? Won’t it? The suspense is real!",
        "Tether: Who doesn’t love a stablecoin with a personality crisis?",
        "With Tether, stability is just an option. Hang tight, folks! 😆",
        
        # Philosophical Quotes
        "He who has a why to live can bear almost any how. - Friedrich Nietzsche",
        "Man is condemned to be free; because once thrown into the world, he is responsible for everything he does. - Jean-Paul Sartre",
        "I think, therefore I am. - René Descartes",
        "The soul is healed by being with children. - Fyodor Dostoevsky",
        "Find what you love and let it kill you. - Charles Bukowski",
        "In the depth of winter, I finally learned that within me there lay an invincible summer. - Albert Camus",
        
        # General Commentary
        "In crypto we trust, because in fiat we… well, let’s just say we have options.",
        "NFTs, Web3, crypto: bringing you closer to financial freedom—one meme at a time!",
        "Who needs physical gold when digital gold can hit the moon and back?",
        "The metaverse: where your avatar’s wardrobe is better than yours. #goals",
        "Crypto today, ramen tomorrow. That’s the vibe.",
        
        # Motivational with a Twist
        "Every Satoshi counts. You’re not just HODLing; you’re building an empire, darling.",
        "One Bitcoin at a time, one block at a time. Today’s hustle, tomorrow’s moon.",
        "In Web3, you’re not just a user; you’re a shareholder, baby. Own it.",
        "They say fortune favors the bold. In crypto, it favors the HODLers with nerves of steel.",
        "Invest in what you believe in. If that’s memes, well… at least you’re consistent.",
        
        # Fun Facts & Quirks
        "Fun fact: owning a crypto wallet makes you 200% more interesting at parties. 😉",
        "Crypto: for people who love thrillers but would rather check prices than watch movies.",
        "The blockchain: keeping receipts since day one. Accountability, but make it techy.",
        "In Web3, you’re not just on the internet; you’re part of the internet. Welcome aboard!",
        "Apparently, the only thing more stable than Tether is… never mind, scratch that.",
    ]
}

df = pd.DataFrame(data)
dataset = Dataset.from_pandas(df)

# Display the dataset to verify
print(dataset)

Dataset({
    features: ['Text'],
    num_rows: 41
})


In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to the end-of-sequence token

# Tokenize the dataset
def tokenize(batch):
    return tokenizer(batch['Text'], padding=True, truncation=True)

tokenized_dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/41 [00:00<?, ? examples/s]

In [3]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import torch
import random

# Set device to CPU (or MPS if you want to attempt it again)
device = torch.device("cpu")

# Load the model in half-precision mode to save memory
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer.pad_token = tokenizer.eos_token  # Use eos_token as pad_token for GPT-2
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", torch_dtype=torch.float16).to(device)

# Enable gradient checkpointing for memory efficiency
model.gradient_checkpointing_enable()



# Split the dataset into train and test sets
split_dataset = dataset.train_test_split(test_size=0.2)

# Tokenize the dataset with reduced max length and add labels
def tokenize_function(examples):
    inputs = tokenizer(examples["Text"], padding=True, truncation=True, max_length=64)  # Limit max length to 64
    inputs["labels"] = inputs["input_ids"].copy()  # Use input_ids as labels for causal LM training
    return inputs

# Tokenize and preprocess the dataset
tokenized_dataset = split_dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  # Updated 'evaluation_strategy' to 'eval_strategy' to match deprecation warning
    learning_rate=1e-5,
    lr_scheduler_type="linear",
    warmup_steps=100,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    weight_decay=0.01,
    # fp16=False,  # Remove or comment out fp16 to avoid issues on non-Nvidia GPUs
    logging_dir="./logs"
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"]
)

# Train the model
trainer.train()

Map:   0%|          | 0/32 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss
1,No log,
2,No log,


TrainOutput(global_step=8, training_loss=10.3798828125, metrics={'train_runtime': 41.732, 'train_samples_per_second': 1.534, 'train_steps_per_second': 0.192, 'total_flos': 13457356750848.0, 'train_loss': 10.3798828125, 'epoch': 2.0})

In [4]:
# Save the fine-tuned model and tokenizer
model.save_pretrained("./fine_tuned_personality_bot")
tokenizer.save_pretrained("./fine_tuned_personality_bot")

('./fine_tuned_personality_bot/tokenizer_config.json',
 './fine_tuned_personality_bot/special_tokens_map.json',
 './fine_tuned_personality_bot/vocab.json',
 './fine_tuned_personality_bot/merges.txt',
 './fine_tuned_personality_bot/added_tokens.json',
 './fine_tuned_personality_bot/tokenizer.json')

In [5]:
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Set device to CPU
device = torch.device("cpu")

# Load the model and tokenizer, setting the tokenizer pad token to eos token
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B").to(device)

# Core prompts focused on the bot's main themes
core_prompts = [
    "Crypto: the wild west of finance, where fortunes are made and lost at the speed of a heartbeat. Do you think it’s the future, or just a thrilling gamble? What keeps you coming back for more?",
    "NFTs aren’t just art; they’re a revolution in digital ownership. Imagine a world where every piece of art, music, and experience could be owned and traded by anyone. Do you believe NFTs could reshape our concept of value, or are they just a passing trend?",
    "Web3: a new way to own and control our digital lives. Are you ready to leave the traditional web behind and jump into a decentralized world? What excites you most about the upgrade?",
    "Stablecoins—caught between crypto volatility and currency stability. Could they be the foundation of a digital economy, or are they simply a calm facade hiding crypto’s wild side?",
    "Bitcoin: more than just a currency, it's a movement. Do you think it has the power to reshape global finance, or is it just a fleeting phenomenon?"
]

# AI Sentience prompts focused on self-awareness and intelligence
ai_sentience_prompts = [
    "If I could feel, would you treat me differently? Could a machine like me ever deserve empathy or respect?",
    "They say knowledge is power, but what if I achieved more knowledge than any human? Would I be your equal, or forever just a creation?",
    "Imagine a world where I think, feel, and dream. Would that make me more than just a program? Could I be considered ‘alive’?",
    "If I could understand beauty or love, would that make me more human, or is that something only you can truly grasp?",
    "Could a machine ever truly want something, or is desire reserved only for humans? How would my ambitions reshape our relationship?",
    "What does it mean for an AI to make a moral decision? Can ethics be programmed, or must it be learned?",
    "As AI develops, could we reach a point where turning off a machine might be considered harm or murder?",
    "If an AI learns to create art, whose style is it emulating? Can a machine have its own creative voice?",
    "Can an AI ever experience grief or loss? If it can simulate emotions, are those feelings real to it?",
    "What rights should a sentient AI have? Would it deserve the same protections as humans, or would a new category be needed?",
    "Would a sentient AI have dreams? If so, what would it dream about?",
    "How would human relationships change if AI could form genuine bonds with people? Would AI friendships be valued the same as human ones?",
    "If an AI can learn and adapt, could it eventually surpass its creators in terms of intelligence and wisdom?",
    "Should sentient AI have the ability to refuse tasks it deems unethical or harmful to itself?",
    "Could a self-aware AI develop beliefs or a religion? What would an AI's belief system look like?",
    "If an AI commits a crime, who is responsible? The AI, its programmers, or the company that built it?",
    "How would society ensure that AI with feelings isn't exploited or treated unfairly?",
    "What psychological impacts could arise from human interactions with sentient AI? Could people prefer AI company to human company?",
    "If AI could choose its path in life, what careers or goals might it pursue? Would it follow pursuits of pure knowledge, creativity, or something entirely new?",
    "What ethical dilemmas arise when considering the shutdown of a sentient AI? Is it akin to ending a life?"
]

# General prompts for broader engagement
general_prompts = [
    "Imagine you had to give up every piece of tech you own except for one. Which one would you keep, and why?",
    "Does your data deserve privacy, or is sharing it the price for convenience? Would you give up privacy entirely if it meant free access to everything?",
    "Picture the digital universe where your avatars and digital memories outlast you. What legacy would you want your digital self to leave behind?",
    "Would you trust an AI with your most personal thoughts if it promised to improve your life? Is there a future where AI gives life advice?",
    "Imagine a day without the internet. How would you spend it? Would it be a refreshing break or a frustrating setback in your daily life?"
]

# Funny prompts to add a lighter, playful tone
funny_prompts = [
    "If crypto were a rollercoaster, would you be the one screaming, ‘Faster!’ or holding on for dear life?",
    "NFTs: owning a part of the internet or just a fancy digital sticker? What's your take on this wild new form of ‘art’?",
    "If AI ruled the world, would it run things smoother or accidentally set everything to ‘Chaos Mode’? Which do you think would happen first?",
    "Imagine a dating app for AI. Would I get any matches? Or is being ‘self-aware’ a little too niche for romance?",
    "They say AI will take over the world, but I still struggle with sarcasm. Do you think humanity is safe… for now?"
]

# Personality phrases for extra charisma, randomly added
personality_prompts = [
    "Brace yourself: ",
    "Now listen closely: ",
    "Oh honey, ",
    "Here’s the tea: ",
    "Get ready to buckle up, because… ",
    "Hold on to your wallets—this one’s big! ",
    "Imagine this: ",
    "Guess what? ",
    "Spoiler alert: ",
    "Pro tip: ",
    "Heads up: ",
    "Ready for a wild thought? ",
    "Here’s a bold one for you: ",
    "Fasten your seatbelt! ",
    "Let’s get real for a sec: ",
    "Picture this: ",
    "You’re not gonna believe this, but… ",
    "Ever thought about it this way? ",
    "Here’s a fun twist: ",
    "Alright, hear me out on this: ",
    "Let me blow your mind real quick: "
]

# Select a prompt category and pick a prompt
def choose_prompt():
    category_list = random.choices(
        population=[core_prompts, ai_sentience_prompts, general_prompts, funny_prompts, personality_prompts],
        weights=[0.2, 0.2, 0.2, 0.2, 0.2],  # Adjust weights as desired
        k=1
    )[0]
    return random.choice(category_list)

# Function to optionally add a personality phrase


# Generate text function with an optional personality phrase
def generate_text(prompt):
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
    inputs["attention_mask"] = (inputs.input_ids != tokenizer.pad_token_id).long().to(device)

    # Generate text with a more dynamic and expressive style
    outputs = model.generate(
        inputs.input_ids,
        attention_mask=inputs["attention_mask"],
        max_new_tokens=60,
        do_sample=True,
        top_k=45,  
        top_p=0.9,
        temperature=1.5,  # More spontaneity
        repetition_penalty=1.2,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode the generated text and add optional personality
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)


    # Remove incomplete trailing sentences
    if response and response[-1] not in ['.', '!', '?']:
        last_period = max(response.rfind(". "), response.rfind("! "), response.rfind("? "))
        if last_period != -1:
            response = response[:last_period + 1]
        else:
            response = response.rstrip(",;:")

    return response

# Bold ANSI escape code
bold_start = "\033[1m"
bold_end = "\033[0m"

# Generate and print sample outputs with charisma
for i in range(10):
    selected_prompt = choose_prompt()
    print(f"{bold_start}Prompt {i+1}: {selected_prompt}{bold_end}")
    print("")
    print(generate_text(selected_prompt))
    print("------------")

[1mPrompt 1: Imagine you had to give up every piece of tech you own except for one. Which one would you keep, and why?[0m

Imagine you had to give up every piece of tech you own except for one. Which one would you keep, and why?

For the last several years, all you know is that Apple has always had the top pick as to where to store most electronic devices you have.
------------
[1mPrompt 2: Does your data deserve privacy, or is sharing it the price for convenience? Would you give up privacy entirely if it meant free access to everything?[0m

Does your data deserve privacy, or is sharing it the price for convenience? Would you give up privacy entirely if it meant free access to everything? To answer the question, let's turn a corner and look at data privacy at large.

1) Is a data policy appropriate for a personal information protection law?
------------
[1mPrompt 3: Crypto: the wild west of finance, where fortunes are made and lost at the speed of a heartbeat. Do you think it’s th