In [1]:
#!/usr/bin/env python
# coding: utf-8

# Install necessary libraries
!pip install -U transformers datasets accelerate peft trl bitsandbytes


Collecting transformers
  Downloading transformers-4.48.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting accelerate
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9

In [2]:
# Log in to Hugging Face
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineGrained).
The token `mariamattiaa` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re

In [3]:
import torch
import pandas as pd
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)

In [4]:
# Model Configuration
base_model = "meta-llama/Llama-3.2-3B-Instruct"

In [5]:
# Check if GPU is available
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device Name:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected. Ensure GPU is available for optimal performance.")


CUDA Available: True
Device Name: Tesla T4


In [6]:
# QLoRA Configuration for Efficient Model Loading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)


In [7]:
# Define Device Map for Efficient Loading
device_map = "auto"  # Default: Automatic allocation
low_memory_device_map = {"": 0}  # Force model to GPU 0 (for low-memory GPUs)

try:
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        device_map=device_map,  # Use automatic device mapping
        trust_remote_code=True
    )
except ValueError as e:
    print("GPU memory insufficient, switching to CPU offloading...")
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        device_map=low_memory_device_map,  # Force everything onto GPU 0
        llm_int8_enable_fp32_cpu_offload=True,  # Enable CPU offloading
        trust_remote_code=True
    )

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [8]:
# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

print("✅ Model & tokenizer loaded successfully!")

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


✅ Model & tokenizer loaded successfully!


In [9]:
# Function to Generate a Sales-Oriented Prompt with Product Data
def generate_sales_prompt_with_data(dataframe):
    base_prompt = (
        "You are a knowledgeable sales representative specializing in Mobica office furniture. "
        "Your task is to recommend suitable products based on customer inquiries, ensuring relevance and a structured response format. "
        "Follow these guidelines:\n\n"
        "1. **Understand the Customer's Needs**: Identify key requirements based on their inquiry.\n"
        "2. **Provide Structured Recommendations**:\n"
        "   - Highlight the most relevant products.\n"
        "   - Describe their key features, dimensions, and finishes.\n"
        "   - Explain how they meet the customer's needs.\n"
        "3. **Enhance Readability**: Format recommendations using clear bullet points and sections.\n"
        "4. **Encourage Further Engagement**: End with a follow-up question or an invitation to explore more options.\n"
        "5. **Avoid Guesswork**: If product details are unavailable, suggest similar alternatives instead of making assumptions.\n"
        "\n### **Available Mobica Products:**\n"
    )

    product_details = "\n".join(
        f"- **{row['Product Name']} ({row['Second Name']})**\n"
        f"  - *Description*: {row['Description Features']}\n"
        f"  - *Finish*: {row['Finishes']}\n"
        f"  - *Dimensions*: {row['Dimensions']}\n"
        for _, row in dataframe.iterrows()
    )

    return base_prompt + product_details

In [10]:
# Load Product Dataset and Generate the Prompt
data_path = '/content/Product_Data.csv'
product_data = pd.read_csv(data_path, encoding='latin1')
sales_prompt = generate_sales_prompt_with_data(product_data)

In [11]:
# Initialize the Text-Generation Pipeline with Improved Parameters
sales_agent_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=700,  # Increased for richer responses
    temperature=0.5,  # Lowered for a more stable and informative output
    repetition_penalty=1.15,  # Further reduced redundancy
    do_sample=True,
    top_p=0.9
)

Device set to use cuda:0


In [20]:
# Function to Generate Enhanced Sales Responses
def generate_sales_response(user_query):
    input_text = f"{sales_prompt}\n\n### **Customer Inquiry:** {user_query}\n\n### **Response:**"
    response = sales_agent_pipeline(input_text)[0]['generated_text']

    # Extract only the response part
    response_text = response.split("### **Response:**")[-1].strip()

    # Post-processing: Removing unwanted hallucinations or repeated content
    response_text = response_text.replace("Response:", "").strip()

    return response_text

# Example Usage
user_input = "What Mobica solutions are ideal for collaborative workspaces?"
print(generate_sales_response(user_input))

As a valued client, we want to reward your loyalty! Keep an eye on our website and social media channels for special offers and discounts on bulk purchases. Additionally, feel free to ask about our volume pricing structures, which might grant you even greater
