<a href="https://colab.research.google.com/github/Nagavenkatasai7/642-3/blob/main/llm1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install transformers accelerate bitsandbytes torch datasets -q

# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU Name: {torch.cuda.get_device_name(0)}")
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

GPU Available: True
GPU Name: NVIDIA A100-SXM4-40GB
GPU Memory: 39.6 GB


In [None]:
# First, install bitsandbytes and other requirements
!pip install -U bitsandbytes
!pip install -U transformers accelerate

# Import and check versions
import transformers
import bitsandbytes
print(f"Transformers version: {transformers.__version__}")
print(f"Bitsandbytes version: {bitsandbytes.__version__}")

Collecting transformers
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Downloading transformers-4.53.0-py3-none-any.whl (10.8 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.8/10.8 MB[0m [31m105.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.53.0


Transformers version: 4.52.4
Bitsandbytes version: 0.46.0


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Use an open model that doesn't require authentication
model_name = "HuggingFaceH4/zephyr-7b-beta"  # Excellent reasoning model, no auth needed!

print("Loading model... This will take about 1-2 minutes")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True,  # Efficient loading
)

print("‚úÖ Model loaded successfully!")

Loading model... This will take about 1-2 minutes


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

‚úÖ Model loaded successfully!


In [None]:
class ReasoningModel:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

    def generate_reasoning_chain(self, problem, max_steps=10):
        """Generate step-by-step reasoning for complex problems"""

        reasoning_prompt = f"""<s>[INST] You are an expert problem solver. Solve this step-by-step.

Problem: {problem}

Instructions:
1. Break down the problem into clear steps
2. Show your work for each step
3. Double-check your reasoning
4. Provide a clear final answer

Solution:
Let me solve this step by step.

Step 1: Understanding the problem
"""

        inputs = self.tokenizer(
            reasoning_prompt,
            return_tensors="pt",
            truncation=True,
            max_length=1024
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=1500,
                temperature=0.7,
                do_sample=True,
                top_p=0.95,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.eos_token_id
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return self._extract_reasoning(response)

    def _extract_reasoning(self, response):
        """Extract and format the reasoning steps"""
        if "[INST]" in response:
            response = response.split("[/INST]")[-1].strip()
        return response

    def solve_with_verification(self, problem):
        """Solve problem and verify the answer"""
        # First attempt
        solution1 = self.generate_reasoning_chain(problem)

        # Verification prompt
        verify_prompt = f"""<s>[INST] Check this solution for errors:

Problem: {problem}

Proposed Solution:
{solution1}

Please verify if this solution is correct. If there are errors, explain what's wrong.
[/INST]"""

        inputs = self.tokenizer(verify_prompt, return_tensors="pt", truncation=True, max_length=2048).to(self.device)

        with torch.no_grad():
            verify_outputs = self.model.generate(
                **inputs,
                max_new_tokens=500,
                temperature=0.3,
                do_sample=True
            )

        verification = self.tokenizer.decode(verify_outputs[0], skip_special_tokens=True)

        return {
            "solution": solution1,
            "verification": self._extract_reasoning(verification),
            "problem": problem
        }

# Initialize the reasoning model
reasoner = ReasoningModel(model, tokenizer)
print("‚úÖ Reasoning engine ready!")

‚úÖ Reasoning engine ready!


In [None]:
# Test 1: Math reasoning
math_problem = """
A store is having a sale. All items are 25% off, and there's an additional 10% off
if you buy 3 or more items. Sarah buys 4 shirts originally priced at $30 each.
How much does she pay in total?
"""

print("üî¢ MATH PROBLEM:")
print(math_problem)
print("\nü§ñ REASONING:")
result = reasoner.solve_with_verification(math_problem)
print(result["solution"])
print("\n‚úì VERIFICATION:")
print(result["verification"])
print("-" * 80)

üî¢ MATH PROBLEM:

A store is having a sale. All items are 25% off, and there's an additional 10% off 
if you buy 3 or more items. Sarah buys 4 shirts originally priced at $30 each. 
How much does she pay in total?


ü§ñ REASONING:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] You are an expert problem solver. Solve this step-by-step.

Problem: 
A store is having a sale. All items are 25% off, and there's an additional 10% off 
if you buy 3 or more items. Sarah buys 4 shirts originally priced at $30 each. 
How much does she pay in total?


Instructions:
1. Break down the problem into clear steps
2. Show your work for each step
3. Double-check your reasoning
4. Provide a clear final answer

Solution:
Let me solve this step by step.

Step 1: Understanding the problem
- The store is having a sale with two discounts: 25% off on all items and an additional 10% off if you buy 3 or more items.
- Sarah bought 4 shirts originally priced at $30 each.

Step 2: Calculate the discount for the first discount (25%)
- 1st discount: Each shirt was originally priced at $30, so Sarah paid $30 for each shirt.
- For the first discount, we calculate 25% of each shirt price ($7.50).
- Shirt 1: $30 - $7.50 = $22.50 after the first discount.
- Shirt 2: $30 - $7.50 = $22.50 af

In [None]:
# Implement different reasoning strategies
class AdvancedReasoner(ReasoningModel):
    def __init__(self, model, tokenizer):
        super().__init__(model, tokenizer)

    def chain_of_thought(self, problem):
        """Classic chain-of-thought reasoning"""
        cot_prompt = f"""<s>[INST] {problem}

Let's approach this step-by-step:
[/INST]"""
        return self._generate(cot_prompt)

    def tree_of_thought(self, problem, branches=3):
        """Generate multiple solution paths"""
        solutions = []

        for i in range(branches):
            tot_prompt = f"""<s>[INST] {problem}

Approach {i+1}: Let me try a different method:
[/INST]"""
            solution = self._generate(tot_prompt, temperature=0.8)
            solutions.append(solution)

        # Evaluate solutions
        best_solution = self._select_best_solution(problem, solutions)
        return best_solution

    def _generate(self, prompt, temperature=0.7, max_tokens=1000):
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.95,
                pad_token_id=self.tokenizer.eos_token_id
            )

        return self.tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[-1].strip()

    def _select_best_solution(self, problem, solutions):
        """Use the model to select the best solution"""
        eval_prompt = f"""<s>[INST] Problem: {problem}

Here are different solutions:

{chr(10).join([f"Solution {i+1}: {sol[:200]}..." for i, sol in enumerate(solutions)])}

Which solution is most accurate and complete? Explain why.
[/INST]"""

        evaluation = self._generate(eval_prompt, temperature=0.3)

        return {
            "solutions": solutions,
            "evaluation": evaluation,
            "best": solutions[0]  # In practice, parse the evaluation to pick the best
        }

# Create advanced reasoner
advanced_reasoner = AdvancedReasoner(model, tokenizer)

In [None]:
def interactive_reasoning_system():
    """Interactive system with multiple reasoning modes"""

    print("üß† ADVANCED REASONING SYSTEM (A100-Powered)")
    print("=" * 60)
    print("Commands:")
    print("  'solve' - Standard reasoning")
    print("  'verify' - Solve with verification")
    print("  'tree' - Tree of thought (multiple solutions)")
    print("  'quit' - Exit")
    print("=" * 60)

    while True:
        mode = input("\nMode (solve/verify/tree/quit): ").lower()

        if mode == 'quit':
            break

        if mode not in ['solve', 'verify', 'tree']:
            print("Invalid mode. Try again.")
            continue

        problem = input("\nEnter your problem:\n> ")

        print("\nü§î Thinking...\n")

        if mode == 'solve':
            result = advanced_reasoner.chain_of_thought(problem)
            print("SOLUTION:")
            print(result)

        elif mode == 'verify':
            result = reasoner.solve_with_verification(problem)
            print("SOLUTION:")
            print(result["solution"])
            print("\nVERIFICATION:")
            print(result["verification"])

        elif mode == 'tree':
            result = advanced_reasoner.tree_of_thought(problem, branches=3)
            print("MULTIPLE APPROACHES GENERATED")
            print("\nEVALUATION:")
            print(result["evaluation"])

        print("\n" + "="*60)

# Run the interactive system
interactive_reasoning_system()

üß† ADVANCED REASONING SYSTEM (A100-Powered)
Commands:
  'solve' - Standard reasoning
  'verify' - Solve with verification
  'tree' - Tree of thought (multiple solutions)
  'quit' - Exit

Mode (solve/verify/tree/quit): solve

Enter your problem:
> 9+3*4/2+92-44*66%55


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



ü§î Thinking...

SOLUTION:
1. Perform the operations inside the parentheses first:
9 + 3 = 12

2. Multiply the result from step 1 by 4:
12 * 4 = 48

3. Add 2 to the result from step 2:
48 + 2 = 50

4. Subtract 44 from the result of step 3:
50 - 44 = 6

5. Multiply the result from step 4 by 66%:
6 * 66% = 3.96

6. Round the result from step 5 to two decimal places:
3.96 rounded to two decimal places is 3.96.

The answer is: 3.96.



KeyboardInterrupt: Interrupted by user

In [None]:
# Test suite for reasoning capabilities
test_problems = [
    {
        "type": "math",
        "problem": "If a rectangle has a perimeter of 24 cm and its length is twice its width, what is its area?"
    },
    {
        "type": "logic",
        "problem": "Three friends - Alice, Bob, and Charlie - are wearing red, blue, and green shirts. Alice isn't wearing red. Bob isn't wearing blue. Charlie isn't wearing green. What color is each person wearing?"
    },
    {
        "type": "coding",
        "problem": "Write a Python function to find the second largest number in a list without using built-in sorting."
    }
]

print("üèÜ BENCHMARKING REASONING CAPABILITIES\n")

for test in test_problems:
    print(f"\n{'='*60}")
    print(f"Problem Type: {test['type'].upper()}")
    print(f"Problem: {test['problem']}")
    print(f"{'='*60}")

    solution = reasoner.generate_reasoning_chain(test['problem'])
    print(solution)

    # Save results
    with open(f"reasoning_test_{test['type']}.txt", "w") as f:
        f.write(f"Problem: {test['problem']}\n\n")
        f.write(f"Solution:\n{solution}")

In [None]:
# Save your reasoning configuration
import json

config = {
    "model_name": model_name,
    "reasoning_patterns": ["chain_of_thought", "tree_of_thought", "verification"],
    "optimal_settings": {
        "temperature": 0.7,
        "max_tokens": 1500,
        "top_p": 0.95
    }
}

with open("reasoning_model_config.json", "w") as f:
    json.dump(config, f, indent=2)

print("‚úÖ Model configuration saved!")
print("\nYour A100-powered reasoning model is ready!")
print("Total VRAM usage:", torch.cuda.memory_allocated(0) / 1024**3, "GB")