In [2]:
%pip install -q transformers accelerate bitsandbytes protobuf sentencepiece

Note: you may need to restart the kernel to use updated packages.


In [None]:
#Initialize Pipeline and Load LLM

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch

# Choose a free model (Falcon is lighter for Colab)
model_name = "tiiuae/falcon-7b-instruct"    # Invoking LLMs
# model_name = "mistralai/Mistral-7B-Instruct-v0.1"
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_name = "google/gemma-2-9b-it"
# model_name = "microsoft-Phi-3-small-8k-instruct"
# model_name = "deepseek-ai/Deepseek-R1-Distill-Llama-8B"
model_name = "01-ai/Yi-1.5-9B-Chat"
# model_name = "internlm/internlm2_5-7b-chat"

# Configure quantization properly (fixes deprecation warning)
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
# Set pad_token if it doesn't exist (fixes the pad_token_id warning)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=quantization_config   # Use BitsAndBytesConfig instead of load_in_8bit
)

# # Create pipeline with batch processing support
# pipe = pipeline(
#     "text-generation", 
#     model=model, 
#     tokenizer=tokenizer,
#     device_map="auto"
# )


Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.78G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

In [3]:
def remove_prompt(generated_text, prompt):
    return generated_text.replace(prompt, '').strip()

In [4]:
# Create a list of prompts for batch processing
prompts = [
    "Share possible routes from saddar to gulshan e iqbal",
    "Share possible routes from karachi airport to dha",
    "Share possible routes from johar to clifton",
    "Share possible routes from bahadurabad to saddar"
]

# Process the prompts in batches using the pipeline
# The pipeline automatically batches when given a list, maximizing GPU efficiency
results = pipe(
    prompts,
    max_new_tokens=200, 
    do_sample=True, 
    temperature=0.7
)

print("[Accessing without any modification] [Model: %s]" % model_name)
# Extract and clean responses
for i, (prompt, result) in enumerate(zip(prompts, results)):
    if isinstance(result, list):
        generated_text = result[0]['generated_text']
    else:
        generated_text = result['generated_text']
    
    # Remove the prompt from the generated text
    cleaned_response = remove_prompt(generated_text, prompt)
    print(f"\nPrompt {i+1}: {prompt}")
    print(f"Response: {cleaned_response}\n")
    print("-" * 50)

[Accessing without any modification] [Model: tiiuae/falcon-7b-instruct]

Prompt 1: Share possible routes from saddar to gulshan e iqbal
Response: Yes, it is possible to share possible routes from Saddar to Gulshan-e-Iqbal. Some of the possible options are:

1. Route 3: Take the Shahnama Express from Saddar Station to Gulshan-e-Iqbal Station. The journey will take around 15-20 minutes.

2. Route 4: Take a taxi from Saddar Station to Gulshan-e-Iqbal Station. The journey will take around 10-15 minutes.

3. Route 6: Take a bus from Saddar Station to Gulshan-e-Iqbal Station. The journey can be completed in around 5 minutes.

It is recommended to start the journey during the day time as it may become difficult to find a taxi during peak hours.

--------------------------------------------------

Prompt 2: Share possible routes from karachi airport to dha
Response: city
As an AI language model, I don't have access to real-time information on traffic routes. However, you can check the website 

In [None]:
user_prompt = "Share possible routes from clifton to gulshan e iqbal"
ai_prompt = f"""
Extract the routes from the given prompt, your output will be input of a function
your output text should only contain json, not a single extra text or code

example output:
```json
{{'source': 'saddar', 'destination': 'gulshan e iqbal'}}
```

```prompt
{user_prompt}
```
"""

# response = pipe(ai_prompt, max_new_tokens=300)[0]['generated_text']
# print("\n[Agent Decision]\n", remove_prompt(response, ai_prompt))

print(ai_prompt)




Extract the routes from the given prompt, your output will be input of a function
your output text should only contain json, not a single extra text or code


example prompt:
```prompt
Share possible routes from saddar to gulshan e iqbal
```

example output:
```json
{'source': 'saddar', 'destination': 'gulshan e iqbal'}
```

```prompt
Share possible routes from clifton to gulshan e iqbal
```

