In [None]:
!pip install openai
!pip install
!pip install httpcore
!pip install httpcore==0.15.0 httpx pymongo googletrans
!pip install bitsandbytes
!pip install peft

Collecting openai
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.2-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.7/383.7 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━

In [None]:
import os
import math
import pathlib
from typing import Optional, Dict
from dataclasses import dataclass, field
import json

import torch
from torch.utils.data import Dataset
import transformers
from transformers.training_args import TrainingArguments
from peft import LoraConfig, TaskType, get_peft_model  # Add peft imports for LoRA


@dataclass
class ModelArguments:
    model_name_or_path: Optional[str] = field(default="baichuan-inc/Baichuan2-7B-Base")


@dataclass
class DataArguments:
    data_path: str = field(default=None, metadata={"help": "Path to the training data."})


@dataclass
class CustomTrainingArguments(transformers.TrainingArguments):
    cache_dir: Optional[str] = field(default=None)
    optim: str = field(default="adamw_torch")
    model_max_length: int = field(
        default=512,
        metadata={"help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."},
    )
    use_lora: bool = field(default=False)  # LoRA option


class SupervisedDataset(Dataset):
    """Dataset for supervised fine-tuning."""

    def __init__(self, data_path, tokenizer, model_max_length, user_tokens=[195], assistant_tokens=[196]):
        super(SupervisedDataset, self).__init__()
        self.data = json.load(open(data_path))
        self.tokenizer = tokenizer
        self.model_max_length = model_max_length
        self.user_tokens = user_tokens
        self.assistant_tokens = assistant_tokens
        self.ignore_index = -100

    def __len__(self):
        return len(self.data)

    def preprocessing(self, example):
        input_ids = []
        labels = []

        for message in example["trajectory"]:
            from_ = message["role"]
            value = message["content"]
            value_ids = self.tokenizer.encode(value)

            if from_ == "user":
                input_ids += self.user_tokens + value_ids
                labels += [self.tokenizer.eos_token_id] + [self.ignore_index] * len(value_ids)
            else:
                input_ids += self.assistant_tokens + value_ids
                labels += [self.ignore_index] + value_ids
        input_ids.append(self.tokenizer.eos_token_id)
        labels.append(self.tokenizer.eos_token_id)
        input_ids = input_ids[: self.model_max_length]
        labels = labels[: self.model_max_length]
        input_ids += [self.tokenizer.pad_token_id] * (self.model_max_length - len(input_ids))
        labels += [self.ignore_index] * (self.model_max_length - len(labels))
        input_ids = torch.LongTensor(input_ids)
        labels = torch.LongTensor(labels)
        attention_mask = input_ids.ne(self.tokenizer.pad_token_id)
        return {"input_ids": input_ids, "labels": labels, "attention_mask": attention_mask}

    def __getitem__(self, idx) -> Dict[str, torch.Tensor]:
        return self.preprocessing(self.data[idx])


def train(model_args, data_args, training_args):
    # Load the base model
    model = transformers.AutoModelForCausalLM.from_pretrained(
        model_args.model_name_or_path,
        fp16=True,
        trust_remote_code=True,
        cache_dir=training_args.cache_dir
    )

    # Load the tokenizer
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        model_args.model_name_or_path,
        use_fast=False,
        trust_remote_code=True,
        model_max_length=training_args.model_max_length,
        cache_dir=training_args.cache_dir,
    )

    # Optionally apply LoRA
    if training_args.use_lora:
        # Define LoRA config
        peft_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,  # Task type for causal language modeling
            target_modules=["W_pack"],  # LoRA applied to specific model layers
            inference_mode=False,
            r=8,  # Low-rank dimension (adjust based on memory constraints)
            lora_alpha=32,  # Scaling factor
            lora_dropout=0.1,  # Dropout for LoRA
        )
        model.enable_input_require_grads()  # Enable gradients for the layers we are modifying
        model = get_peft_model(model, peft_config)  # Apply LoRA to the model
        model.print_trainable_parameters()  # Print the number of trainable parameters to verify

    # Create the dataset
    dataset = SupervisedDataset(data_args.data_path, tokenizer, training_args.model_max_length)

    # Initialize the trainer
    trainer = transformers.Trainer(
        model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer
    )

    # Start training
    trainer.train()

    # Save model and state
    trainer.save_state()
    trainer.save_model(output_dir=training_args.output_dir)
    tokenizer.save_pretrained(training_args.output_dir)  # Save tokenizer as well


# Define your arguments directly in the notebook
model_args = ModelArguments(model_name_or_path="openbmb/MiniCPM3-4B")
data_args = DataArguments(data_path="agent_trajectories.json")  # Update with the actual path to your JSON file




In [None]:


training_args = CustomTrainingArguments(
    output_dir="./results_finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=1,  # Use a smaller batch size to save memory
    gradient_accumulation_steps=8,  # Accumulate gradients over 4 steps to simulate a larger batch size
    learning_rate=2e-5,
    evaluation_strategy="epoch",
    cache_dir=None,
    model_max_length=512,
    fp16=True,  # Enable mixed precision to save memory
    use_lora=True,  # Enable LoRA for fine-tuning
)





In [None]:


# Call the train function
train(model_args, data_args, training_args)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/738 [00:00<?, ?B/s]

configuration_baichuan.py:   0%|          | 0.00/2.38k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/baichuan-inc/Baichuan2-7B-Base:
- configuration_baichuan.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_baichuan.py:   0%|          | 0.00/33.1k [00:00<?, ?B/s]

quantizer.py:   0%|          | 0.00/9.07k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/baichuan-inc/Baichuan2-7B-Base:
- quantizer.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


generation_utils.py:   0%|          | 0.00/2.97k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/baichuan-inc/Baichuan2-7B-Base:
- generation_utils.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/baichuan-inc/Baichuan2-7B-Base:
- modeling_baichuan.py
- quantizer.py
- generation_utils.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
pip install xformers.


pytorch_model.bin.index.json:   0%|          | 0.00/18.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.93G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/5.08G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
!pip install datasets
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import re

# Load GSM8K dataset
dataset = load_dataset("openai/gsm8k", "main")

# Load MathCoder-L-7B model and tokenizer
model_name = "MathLLMs/MathCoder-L-7B"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def generate_thoughts(question):
    prompt = f"""Solve this problem step by step. Provide your reasoning as a series of statements:

Problem: {question}

Solution:"""

    try:
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = model.generate(**inputs, max_length=300, num_return_sequences=1,
                                 temperature=0.7, do_sample=True, num_beams=4)
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        print("Raw generated text:", generated_text)  # Debugging line

        # Split the generated text into sentences
        thoughts = [sent.strip() for sent in re.split(r'(?<=[.!?])\s+', generated_text) if sent.strip()]

        return thoughts
    except Exception as e:
        print(f"Error in generate_thoughts: {str(e)}")
        return []

def detect_hallucination(thought, question, correct_answer):
    compare_prompt = f"""Question: {question}

Correct solution: {correct_answer}

Current thought: {thought}

Based on the correct solution, is the current thought correct and relevant?
Respond with 'Correct' if the thought is accurate and relevant to the solution, or 'Incorrect' if it contains any errors or is irrelevant.
Then provide a brief explanation for your decision.

Decision and Explanation:"""

    inputs = tokenizer(compare_prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    print(f"MathCoder Evaluation: {result}")  # Print the raw evaluation result

    if result.lower().startswith('incorrect'):
        return True
    elif result.lower().startswith('correct'):
        return False
    else:
        print("Unclear evaluation result. Treating as potential hallucination.")
        return True

def solve_problem(question, correct_answer):
    print(f"Question: {question}")
    print(f"Correct Answer (Raw Label): {correct_answer}\n")

    thoughts = generate_thoughts(question)
    print(f"Total number of thoughts: {len(thoughts)}\n")

    if thoughts:
        print("Generated thoughts:")
        for i, thought in enumerate(thoughts):
            print(f"Thought {i+1}: {thought}")
            print()  # Add an empty line after each thought

        print("Hallucination detection:")
        for i, thought in enumerate(thoughts):
            print(f"Checking Thought {i+1}:")
            if detect_hallucination(thought, question, correct_answer):
                print(f"Thought {i+1} may contain a hallucination.")
            else:
                print(f"Thought {i+1} seems valid.")
            print()  # Add an empty line after each hallucination check
    else:
        print("No thoughts were generated.")

# Process samples from the dataset
num_samples = 5  # Number of samples to process
for i in range(num_samples):
    sample = dataset["train"][i]
    question = sample["question"]
    correct_answer = sample["answer"]  # This is the raw label from the dataset
    print(f"\n{'='*50}\nSample {i+1}:\n{'='*50}")
    solve_problem(question, correct_answer)
    print("\n")

Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.1-py3-none-any.whl (471 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
!pip install openai==0.28



In [None]:
import openai

# Set your OpenAI API key
openai.api_key = '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'

def prompt_gpt3(prompt):
    try:
        # Call the OpenAI API
        response = openai.ChatCompletion.create(
            model="gpt-3.5",  # Specify the model
            messages=[
                {"role": "user", "content": prompt}  # The user message
            ],
            max_tokens=100,  # Specify max tokens in response
            temperature=0.7,  # Control randomness of responses
        )

        # Get the response content
        return response.choices[0].message['content']
    except Exception as e:
        return f"An error occurred: {e}"

# Example usage
user_prompt = "What are the benefits of using Python for data analysis?"
response = prompt_gpt3(user_prompt)
print("Response from GPT-3.5:", response)

# dang premium cant access

Response from GPT-3.5: An error occurred: The model `gpt-3.5` does not exist or you do not have access to it.
