In [None]:
!pip install -q google-generativeai

In [None]:
!pip install google-cloud-storage



In [None]:
from google.cloud import storage
import io
from google.colab import userdata
from google.colab import auth
import pandas as pd
import google.generativeai as genai

# Initialize the Google Cloud Storage client
client = storage.Client()

# Set the name of your bucket and file
bucket_name = 'cip_dataset_bucket'
file_name = 'Intent_detection_Data.xlsx'

# Configure the API key for Google Generative AI
genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))
auth.authenticate_user()

try:
  # Get the bucket and blob (file)
  bucket = client.get_bucket(bucket_name)
  blob = bucket.blob(file_name)
  # Read the file content as a byte stream
  file_content = blob.download_as_bytes()
  df = pd.read_excel(io.BytesIO(file_content))
except FileNotFoundError:
    print("Error: Intent_detection_Data.xlsx not found.")
    exit()

Creating Prompts from Data set  

In [None]:
from sklearn.model_selection import train_test_split
import time
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Convert REQUEST to lowercase
df['REQUEST'] = df['REQUEST'].astype(str).str.lower()

# Prepare prompts and corresponding intents
prompts = []
for index, row in df.iterrows():
    prompts.append({"text_input": row['REQUEST'], "output": row['INTENT']})

# Split data into training and testing sets (80% train, 20% test)
train_prompts, test_prompts = train_test_split(prompts, test_size=0.2, random_state=42)
print(f"Total prompts: {len(prompts)}")
print(f"Training prompts: {len(train_prompts)}")
print(f"Testing prompts: {len(test_prompts)}")

Total prompts: 160
Training prompts: 128
Testing prompts: 32


Training and tuning the Model

In [None]:
base_model = "models/gemini-1.5-flash-001-tuning"
name = "GeminiPredictionModel"

operation = genai.create_tuned_model(
    display_name=name,
    source_model=base_model,
    epoch_count=20,
    batch_size=4,
    learning_rate=0.0001,
    training_data=train_prompts,
)

# Wait for the operation to finish
for status in operation.wait_bar():
    time.sleep(10)

# Get the trained model result
result = operation.result()
print("Model trained successfully: ", result)

  0%|          | 0/640 [00:00<?, ?it/s]

Model trained successfully:  TunedModel(name='tunedModels/geminipredictionmodel-hzt5pchhcwk4',
           source_model='models/gemini-1.5-flash-001-tuning',
           base_model='models/gemini-1.5-flash-001-tuning',
           display_name='GeminiPredictionModel',
           description='',
           temperature=1.0,
           top_p=0.95,
           top_k=64,
           state=<State.ACTIVE: 2>,
           create_time=datetime.datetime(2024, 12, 4, 23, 17, 54, 688047, tzinfo=datetime.timezone.utc),
           update_time=datetime.datetime(2024, 12, 4, 23, 42, 24, 879596, tzinfo=datetime.timezone.utc),
           tuning_task=TuningTask(start_time=datetime.datetime(2024, 12, 4, 23, 17, 55, 536339, tzinfo=datetime.timezone.utc),
                                  complete_time=datetime.datetime(2024, 12, 4, 23, 42, 24, 879596, tzinfo=datetime.timezone.utc),
                                  snapshots=[...],
                                  hyperparameters=Hyperparameters(epoch_count=20,

Calculating the Metrics

In [None]:
import csv
import time
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Function to calculate accuracy, precision, recall, F1 score
def calculate_metrics(true_labels, predicted_labels):
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    return accuracy, precision, recall, f1

# Define a zero-shot prompt
def zero_shot_predict(request_text, intent_categories):
    try:
        prompt = (
            f"The following text is a customer query. "
            f"Please classify it into one of the following intents: {', '.join(intent_categories)}.\n\n"
            f"Query: \"{request_text}\"\n"
            f"Intent (please return only the intent without extra symbols):"
        )
        zmodel = genai.GenerativeModel("gemini-1.5-flash")
        response = zmodel.generate_content(prompt)  # Get the prediction response from the model
        return response.text.strip()
    except Exception as e:
        print(f"Error during zero-shot prediction: {e}")
        return "Error"

# Prepare CSV file to store results
def save_to_csv(data, filename="predictions.csv"):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        if file.tell() == 0:  # If file is empty, write header
            writer.writerow(["Input", "Actual Intent", "Predicted Intent", "Model Type"])
        writer.writerows(data)

# Predefined intent categories
intent_categories = ["CatalogSelection", "OrderStatus", "ProductReplacement", "PriceAndAvailability"]

# Initialize lists to store true and predicted labels for metric calculation
true_labels = []
predicted_labels_zero_shot = []
predicted_labels_fine_tuned = []

# List to store CSV data for logging
csv_data_zero_shot = []
csv_data_fine_tuned = []

# (Zero-shot) on the test set
for prompt in test_prompts:
    input_text = prompt['text_input']
    true_label = prompt['output']

    # Zero-shot Prediction
    predicted_intent_zero_shot = zero_shot_predict(input_text, intent_categories)

    # Fine-tuned model prediction
    try:
        # Replace this line with actual fine-tuned model loading and inference
        model = genai.GenerativeModel(model_name=result.name)  # Load your fine-tuned model
        response = model.generate_content(input_text)
        predicted_intent_fine_tuned = response.text.strip()
    except Exception as e:
        print(f"Error during fine-tuned prediction: {e}")
        predicted_intent_fine_tuned = "Error"

    # Collect the true and predicted labels
    true_labels.append(true_label)
    predicted_labels_zero_shot.append(predicted_intent_zero_shot)
    predicted_labels_fine_tuned.append(predicted_intent_fine_tuned)

    # Store data for CSV logging
    csv_data_zero_shot.append([input_text, true_label, predicted_intent_zero_shot, "Zero-Shot"])
    csv_data_fine_tuned.append([input_text, true_label, predicted_intent_fine_tuned, "Fine-Tuned"])

    # Print results for zero-shot and fine-tuned predictions
    print(f"Input: {input_text}")
    print(f"Actual Intent: {true_label}")
    print(f"Predicted Intent (Zero-shot): {predicted_intent_zero_shot}")
    print(f"Predicted Intent (Fine-tuned): {predicted_intent_fine_tuned}")
    print("-" * 50)

# Save results to CSV for Zero-shot predictions
save_to_csv(csv_data_zero_shot, filename="zero_shot_predictions.csv")

# Save results to CSV for Fine-tuned predictions
save_to_csv(csv_data_fine_tuned, filename="fine_tuned_predictions.csv")

# Calculate and print metrics for Zero-shot
accuracy_zero_shot, precision_zero_shot, recall_zero_shot, f1_zero_shot = calculate_metrics(true_labels, predicted_labels_zero_shot)
print(f"Zero-shot Accuracy: {accuracy_zero_shot:.4f}")
print(f"Zero-shot Precision: {precision_zero_shot:.4f}")
print(f"Zero-shot Recall: {recall_zero_shot:.4f}")
print(f"Zero-shot F1 Score: {f1_zero_shot:.4f}")

# Calculate and print metrics for Fine-tuned model
accuracy_fine_tuned, precision_fine_tuned, recall_fine_tuned, f1_fine_tuned = calculate_metrics(true_labels, predicted_labels_fine_tuned)
print(f"Fine-tuned Accuracy: {accuracy_fine_tuned:.4f}")
print(f"Fine-tuned Precision: {precision_fine_tuned:.4f}")
print(f"Fine-tuned Recall: {recall_fine_tuned:.4f}")
print(f"Fine-tuned F1 Score: {f1_fine_tuned:.4f}")


Input: hi team, please provide ship date for dd06786878. thanks .
Actual Intent: OrderStatus
Predicted Intent (Zero-shot): OrderStatus
Predicted Intent (Fine-tuned): OrderStatus
--------------------------------------------------
Input: hi, may i ask for the leadtime of this item lc6786878
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): PriceAndAvailability
--------------------------------------------------
Input: sir/mam, good day ! request for availability ats12y89a
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): PriceAndAvailability
--------------------------------------------------
Input: good day, i would like to inquire what can we offer for f1426uest2m?
Actual Intent: Product Replacement
Predicted Intent (Zero-shot): CatalogSelection
Predicted Intent (Fine-tuned): CatalogSelection
--------------------------------------------------
Input: good

ERROR:tornado.access:503 POST /v1beta/tunedModels/geminipredictionmodel-hzt5pchhcwk4:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 1283.77ms


Input: hello ma'am/sir, i would like to ask po for the mpg of this item: a12678asjs41-- thank you!
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): PriceAndAvailability
--------------------------------------------------
Error during fine-tuned prediction: ("Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 3. The candidate's safety_ratings are: [category: HARM_CATEGORY_SEXUALLY_EXPLICIT\nprobability: HIGH\n, category: HARM_CATEGORY_HATE_SPEECH\nprobability: NEGLIGIBLE\n, category: HARM_CATEGORY_HARASSMENT\nprobability: NEGLIGIBLE\n, category: HARM_CATEGORY_DANGEROUS_CONTENT\nprobability: NEGLIGIBLE\n].", [category: HARM_CATEGORY_SEXUALLY_EXPLICIT
probability: HIGH
, category: HARM_CATEGORY_HATE_SPEECH
probability: NEGLIGIBLE
, category: HARM_CATEGORY_H

ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 353.37ms


Input: tm2dra16rt alternate
Actual Intent: Product Replacement
Predicted Intent (Zero-shot): CatalogSelection
Predicted Intent (Fine-tuned): Product Replacement
--------------------------------------------------
Input: square d load center cat. no. qoc30mw225, h13e8951 wants to know what breaker will fit it
Actual Intent: CatalogSelection
Predicted Intent (Zero-shot): CatalogSelection
Predicted Intent (Fine-tuned): CatalogSelection
--------------------------------------------------
Input: what's the suitable shunt trip accessories and motor charger for a 33472 breaker @230vac
Actual Intent: CatalogSelection
Predicted Intent (Zero-shot): CatalogSelection
Predicted Intent (Fine-tuned): CatalogSelection
--------------------------------------------------
Input: hi, may i ask what is the updated stocks po 123456  in your warehouse? thank you
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): OrderStatus
---------------------

ERROR:tornado.access:503 POST /v1beta/tunedModels/geminipredictionmodel-hzt5pchhcwk4:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 353.79ms


Input: hi team, could you confirm what the response is for cr-1234567? kindly confirm with plant if there is anyway we can stretch earlier and ship by 05/01 at least qty 1 atv2323232323 (pos2) and qty 1 atv232323245(pos3). thanks
Actual Intent: OrderStatus
Predicted Intent (Zero-shot): OrderStatus
Predicted Intent (Fine-tuned): OrderStatus
--------------------------------------------------
Input: good day maam/sir, can i ask what to offer for: 16a dc mcb 2p, also do we have a dc surge protection devices na locally offered?
Actual Intent: CatalogSelection
Predicted Intent (Zero-shot): CatalogSelection
Predicted Intent (Fine-tuned): CatalogSelection
--------------------------------------------------
Input: do you have ats 100ampers?
Actual Intent: CatalogSelection
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): CatalogSelection
--------------------------------------------------
Input: is rbc43 the replacement battery for sua2200rm2u
Actual Intent: Catalo

ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 355.07ms


Input: lead time of acb nw
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): PriceAndAvailability
--------------------------------------------------
Input: regarding po-111111-89
Actual Intent: OrderStatus
Predicted Intent (Zero-shot): OrderStatus
Predicted Intent (Fine-tuned): OrderStatus
--------------------------------------------------
Input: hi, may i ask of this item is available in your warehouse, lv16278ea. thank you
Actual Intent: PriceAndAvailability
Predicted Intent (Zero-shot): PriceAndAvailability
Predicted Intent (Fine-tuned): Product Replacement
--------------------------------------------------
Input: good afternoon. please provide eta under p.o 000000000004 item code metsect5cs23d metsect8912bh. thank you
Actual Intent: OrderStatus
Predicted Intent (Zero-shot): OrderStatus
Predicted Intent (Fine-tuned): OrderStatus
--------------------------------------------------
Zero-shot Accuracy: 0.7500
Zero-shot 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
