In [2]:
!pip install torch transformers datasets peft accelerate bitsandbytes

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from 

In [1]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.43.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.2-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import pandas as pd

# ✅ Load dataset from CSV file
csv_path = "emoji_math_dataset_utf8.csv"
df = pd.read_csv(csv_path)
print(f"✅ Loaded dataset from {csv_path}")

# ✅ Convert dataset to Hugging Face format
dataset = Dataset.from_pandas(df)

# ✅ Format data for training
def format_data(example):
    return {
        "text": f"Riddle: {example['Problem']}\nAnswer: {example['Solution']}"
    }

dataset = dataset.map(format_data, remove_columns=["Problem", "Solution"])

# ✅ Split dataset (90% train, 10% test)
split_dataset = dataset.train_test_split(test_size=0.1)

# ✅ Print a sample to verify
print(split_dataset["train"][0])


✅ Loaded dataset from emoji_math_dataset_utf8.csv


Map:   0%|          | 0/49 [00:00<?, ? examples/s]

{'text': 'Riddle: 🚀 + 🚀 = 10\nAnswer: 🚀 = 5'}


In [5]:

# ✅ Define model & tokenizer (Qwen1.5-4B-Chat)
model_name = "Qwen/Qwen1.5-4B-Chat"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map="auto")

# ✅ Apply LoRA fine-tuning
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# ✅ Ensure tokenizer has a pad token
tokenizer.pad_token = tokenizer.eos_token

# ✅ Tokenization function
def tokenize_data(example):
    tokenized_output = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )
    labels = tokenized_output["input_ids"].copy()
    labels = [(label if label != tokenizer.pad_token_id else -100) for label in labels]
    tokenized_output["labels"] = labels
    return tokenized_output

# ✅ Apply tokenization
tokenized_datasets = split_dataset.map(tokenize_data, batched=True)

# ✅ Training arguments
training_args = TrainingArguments(
    output_dir="./emoji_math_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=9,
    logging_dir="./logs",
    logging_steps=50,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    fp16=torch.cuda.is_available(),
    save_total_limit=2,
)

# ✅ Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer
)

# ✅ Start fine-tuning
trainer.train()

trainer.save_model("./emoji_math_model")
tokenizer.save_pretrained("./emoji_math_model")



Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:  65%|######4   | 2.53G/3.91G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

trainable params: 6,553,600 || all params: 3,956,922,880 || trainable%: 0.1656


Map:   0%|          | 0/44 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

  trainer = Trainer(


  0%|          | 0/198 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.7117507457733154, 'eval_runtime': 3.4085, 'eval_samples_per_second': 1.467, 'eval_steps_per_second': 0.88, 'epoch': 1.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.40140795707702637, 'eval_runtime': 3.4001, 'eval_samples_per_second': 1.471, 'eval_steps_per_second': 0.882, 'epoch': 2.0}
{'loss': 3.3745, 'grad_norm': 0.4095359146595001, 'learning_rate': 3.787878787878788e-05, 'epoch': 2.27}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.24606044590473175, 'eval_runtime': 3.3976, 'eval_samples_per_second': 1.472, 'eval_steps_per_second': 0.883, 'epoch': 3.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.2008497714996338, 'eval_runtime': 3.3953, 'eval_samples_per_second': 1.473, 'eval_steps_per_second': 0.884, 'epoch': 4.0}
{'loss': 0.2366, 'grad_norm': 0.2752733528614044, 'learning_rate': 2.5252525252525256e-05, 'epoch': 4.55}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.18513737618923187, 'eval_runtime': 3.3993, 'eval_samples_per_second': 1.471, 'eval_steps_per_second': 0.883, 'epoch': 5.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.18017630279064178, 'eval_runtime': 3.3978, 'eval_samples_per_second': 1.472, 'eval_steps_per_second': 0.883, 'epoch': 6.0}
{'loss': 0.1878, 'grad_norm': 0.19492600858211517, 'learning_rate': 1.2626262626262628e-05, 'epoch': 6.82}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.1768428385257721, 'eval_runtime': 3.3983, 'eval_samples_per_second': 1.471, 'eval_steps_per_second': 0.883, 'epoch': 7.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.17514267563819885, 'eval_runtime': 3.4339, 'eval_samples_per_second': 1.456, 'eval_steps_per_second': 0.874, 'epoch': 8.0}


  0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.1743285059928894, 'eval_runtime': 3.396, 'eval_samples_per_second': 1.472, 'eval_steps_per_second': 0.883, 'epoch': 9.0}
{'train_runtime': 1398.9672, 'train_samples_per_second': 0.283, 'train_steps_per_second': 0.142, 'train_loss': 1.0024857087568804, 'epoch': 9.0}


('./emoji_math_model\\tokenizer_config.json',
 './emoji_math_model\\special_tokens_map.json',
 './emoji_math_model\\vocab.json',
 './emoji_math_model\\merges.txt',
 './emoji_math_model\\added_tokens.json',
 './emoji_math_model\\tokenizer.json')

In [6]:
# ✅ Test the model on 3 new emoji math problems
test_problems = [
    "🚀 + 🚀 = 18",
    "🐢 + 🐢 + 🐢 = 12",
    "🍕 + 🍕 + 🍕 + 🍕 = 16"
]

for prompt in test_problems:
    input_text = f"Riddle: {prompt}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

    output = model.generate(
        **inputs,
        max_length=60,
        num_return_sequences=1,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    print(f"❓ {prompt} → 🤖 {tokenizer.decode(output[0], skip_special_tokens=True)}")

❓ 🚀 + 🚀 = 18 → 🤖 Riddle: 🚀 + 🚀 = 18
Answer: 🚀 = 9
❓ 🐢 + 🐢 + 🐢 = 12 → 🤖 Riddle: 🐢 + 🐢 + 🐢 = 12
Answer: 🐢 = 4
❓ 🍕 + 🍕 + 🍕 + 🍕 = 16 → 🤖 Riddle: 🍕 + 🍕 + 🍕 + 🍕 = 16
Answer: 🍕 = 4


In [7]:
# ✅ Test the model on 3 new emoji math problems
test_problems = [
  "🐘 + 🐘 + 🐘 = 27",
    "🦁 + 🦁 = 14",
    "🌟 + 🌟 + 🌟 + 🌟 + 🌟 = 50"
]

for prompt in test_problems:
    input_text = f"Riddle: {prompt}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

    output = model.generate(
        **inputs,
        max_length=60,
        num_return_sequences=1,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    print(f"❓ {prompt} → 🤖 {tokenizer.decode(output[0], skip_special_tokens=True)}")

❓ 🐘 + 🐘 + 🐘 = 27 → 🤖 Riddle: 🐘 + 🐘 + 🐘 = 27
Answer: 🐘 = 9
❓ 🦁 + 🦁 = 14 → 🤖 Riddle: 🦁 + 🦁 = 14
Answer: 🦁 = 7
❓ 🌟 + 🌟 + 🌟 + 🌟 + 🌟 = 50 → 🤖 Riddle: 🌟 + 🌟 + 🌟 + 🌟 + 🌟 = 50
Answer: 🌟 = 10


In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_path = "./emoji_math_model"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token

# Set quantization to reduce memory usage
quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # Change to load_in_4bit=True if needed

# Load model with device auto-detection and offloading
try:
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",  # Automatically selects best device (GPU if available)
        offload_folder="./offload_dir",  # Offloads large layers to disk if needed
        quantization_config=quantization_config  # Apply quantization
    )
except Exception as e:
    print(f"Failed to load on GPU, switching to CPU: {e}")
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="cpu"  # Fallback to CPU if necessary
    )

print("Model loaded successfully!")




Failed to load on GPU, switching to CPU: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded successfully!


In [12]:
%%writefile your_script.py
import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_path = "./emoji_math_model"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token

# Set quantization to reduce memory usage
quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # Change to load_in_4bit=True if needed

# Load model with device auto-detection and offloading
try:
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",  # Automatically selects best device (GPU if available)
        offload_folder="./offload_dir",  # Offloads large layers to disk if needed
        quantization_config=quantization_config  # Apply quantization
    )
except Exception as e:
    print(f"Failed to load on GPU, switching to CPU: {e}")
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="cpu"  # Fallback to CPU if necessary
    )

print("Model loaded successfully!")


# ✅ Streamlit UI
st.title("🤖PI'CASSO")
st.write("Enter an emoji-based math problem, and the AI will solve it!")

prompt = st.text_input("Enter an emoji math riddle:", "")

if st.button("Solve"):
    if prompt:
        input_text = f"Riddle: {prompt}\nAnswer:"
        device = "cuda" if torch.cuda.is_available() else "cpu"
        inputs = tokenizer(input_text, return_tensors="pt").to(device)

        # ✅ Generate response
        output = model.generate(
            **inputs, max_length=60, num_return_sequences=1,
            do_sample=True, temperature=0.7, top_p=0.9
        )
        result = tokenizer.decode(output[0], skip_special_tokens=True)

        st.success(f"🤖 Answer: {result}")
    else:
        st.warning("Please enter a valid riddle.")


Writing your_script.py


In [None]:
import subprocess
from pyngrok import ngrok

# Start Streamlit app in a separate process
process = subprocess.Popen(["streamlit", "run", "your_script.py"])

# Expose port 8501
public_url = ngrok.connect(8501)
print(f"Public URL: {public_url}")


t=2025-03-14T21:59:20+0500 lvl=eror msg="unable to evaluate ngrok agent binary path for symlinks" obj=tunnels.session err="CreateFile C:\\Users\\Lenovo\\AppData\\Local\\ngrok\\ngrok.exe: The system cannot find the file specified."


Public URL: NgrokTunnel: "https://ae38-2400-adc7-162-2e00-e189-5be8-4819-861a.ngrok-free.app" -> "http://localhost:8501"


t=2025-03-14T22:01:57+0500 lvl=warn msg="failed to open private leg" id=fba31acaf478 privaddr=localhost:8501 err="dial tcp [::1]:8501: connectex: No connection could be made because the target machine actively refused it."
t=2025-03-14T22:01:59+0500 lvl=warn msg="failed to open private leg" id=67eed2816302 privaddr=localhost:8501 err="dial tcp [::1]:8501: connectex: No connection could be made because the target machine actively refused it."
t=2025-03-15T01:22:45+0500 lvl=eror msg="heartbeat timeout, terminating session" obj=tunnels.session obj=csess id=dd57504ff0e5 clientid=9910be879360f7704e26b8992c801626
t=2025-03-15T01:22:46+0500 lvl=eror msg="session closed, starting reconnect loop" obj=tunnels.session obj=csess id=6596b8398196 err="session closed"
t=2025-03-15T01:22:46+0500 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="failed to dial ngrok server with address \"connect.ngrok-agent.com:443\": dial tcp: lookup connect.ngrok-agent.com: no such host"
t=2025-03-1

In [11]:
!ngrok authtoken 2uHP3chnuezzbtUy8KCZpHnZowX_638q32K2y7FMtEDfcc6XH


Downloading ngrok ...
Downloading ngrok: 0%
Downloading ngrok: 1%
Downloading ngrok: 2%
Downloading ngrok: 3%
Downloading ngrok: 4%
Downloading ngrok: 5%
Downloading ngrok: 6%
Downloading ngrok: 7%
Downloading ngrok: 8%
Downloading ngrok: 9%
Downloading ngrok: 10%
Downloading ngrok: 11%
Downloading ngrok: 12%
Downloading ngrok: 13%
Downloading ngrok: 14%
Downloading ngrok: 15%
Downloading ngrok: 16%
Downloading ngrok: 17%
Downloading ngrok: 18%
Downloading ngrok: 19%
Downloading ngrok: 20%
Downloading ngrok: 21%
Downloading ngrok: 22%
Downloading ngrok: 23%
Downloading ngrok: 24%
Downloading ngrok: 25%
Downloading ngrok: 26%
Downloading ngrok: 27%
Downloading ngrok: 28%
Downloading ngrok: 29%
Downloading ngrok: 30%
Downloading ngrok: 31%
Downloading ngrok: 32%
Downloading ngrok: 33%
Downloading ngrok: 34%
Downloading ngrok: 35%
Downloading ngrok: 36%
Downloading ngrok: 37%
Downloading ngrok: 38%
Downloading ngrok: 39%
Downloading ngrok: 40%
Downloading ngrok: 41%
Downloading ngrok: 42%

In [10]:
!pip install pyngrok



Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: C:\Users\Lenovo\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
