In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Vertex AI Model Garden - Llama 3.1 models


In [None]:
###################### Vertex AI setting up code provided by Vertex AI Model Garden #####################################

### Install Vertex AI SDK for Python and other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform[langchain] openai
! pip3 install --upgrade --quiet langchain-openai

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()



### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = ""  # @param {type:"string"}

LOCATION = ""  # @param {type:"string"}

### Create a Cloud Storage bucket

Create a storage bucket to store tutorial artifacts.

In [None]:
BUCKET_NAME = ""  # @param {type:"string"}

BUCKET_URI = f"gs://{BUCKET_NAME}"

### Initialize Vertex AI SDK for Python

In [None]:
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

### Import libraries

Import libraries to use in this tutorial.

In [None]:
# Chat completions API
import openai
from google.auth import default, transport
from langchain import PromptTemplate
# Build
from langchain_openai import ChatOpenAI
from vertexai.preview import rag

### Configure OpenAI SDK for the Llama 3.1 Chat Completions API

To configure the OpenAI SDK for the Llama 3.1 Chat Completions API, you need to request the access token and initialize the client pointing to the Llama 3.1 endpoint.


#### Authentication

You can request an access token from the default credentials for the current environment. Note that the access token lives for [1 hour by default](https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.


In [None]:
credentials, _ = default()
auth_request = transport.requests.Request()
credentials.refresh(auth_request)

Then configure the OpenAI SDK to point to the Llama 3.1 Chat Completions API endpoint.

Notice, only `us-central1` is supported region for Llama 3.1 models using Model-as-a-Service (MaaS).

In [None]:
MODEL_LOCATION = "us-central1"

client = openai.OpenAI(
    base_url=f"https://{MODEL_LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{MODEL_LOCATION}/endpoints/openapi/chat/completions?",
    api_key=credentials.token,
)

In [None]:
MODEL_ID = "meta/llama-3.1-8b-instruct-maas"  # @param {type:"string"} ["meta/llama-3.1-8b-instruct-maas", "meta/llama-3.1-70b-instruct-maas", "meta/llama-3.1-405b-instruct-maas"]

#### Ask Llama 3.1 using different model configuration

Use the following parameters to generate different answers:

*   `temperature` to control the randomness of the response
*   `max_tokens` to limit the response length
*   `top_p` to control the quality of the response
*   `stream` to stream the response back or not


In [None]:
temperature = 0.6  # @param {type:"number"}
max_tokens = 50  # @param {type:"integer"}
top_p = 1.0  # @param {type:"number"}
stream = True  # @param {type:"boolean"}

### **Load your model - We used Vertex AI Model Garden**

In [None]:
############################ Llama 3.1 8B model loaded #######################################

### **Llama 3.1_8B, temp 0.6, Base (without persona), zero-shot**

In [None]:
from google.cloud import storage
import pandas as pd

# Configure your GCS bucket and file
bucket_name = ""  # Replace with your GCS bucket name
file_path = ""  # Replace with your file's path in the bucket

# Download file from GCS
client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename("")  # Save locally

# Load the dataset
data = pd.read_csv("")

In [None]:
import pandas as pd
import re
import time
from typing import Tuple, Dict

# Define a function to call the Llama model
def llama_model_fn(text: str) -> Tuple[int, str]:
    try:
        system_instruction = "none"
        full_prompt = (
            f"{system_instruction}\n"
            f"Classify the following message as 1 if the person seems to have depression or anxiety, otherwise 0, and provide the reasoning.\n"
            f"Output format:\n"
            f"Classification: <0 or 1>\n"
            f"Reason: <reason_text>\n\n"
            f"Message:\n{text}"
        )

        response = client.chat.completions.create(
            model=MODEL_ID,
            messages=[{"role": "user", "content": full_prompt}],
        )

        # Corrected line: Use dot notation to access 'content'
        content = response.choices[0].message.content.strip()
        print(f"Extracted content: {content}")  # Debug: Print extracted content

        # Initialize default values
        classification = 0
        reasoning = "No reason provided."

        # Parse the response
        parts = content.split('\n')
        for part in parts:
            if "Classification:" in part:
                try:
                    classification = int(part.split(":", 1)[1].strip())
                except ValueError:
                    classification = 0  # Default to 0 in case of error
            elif "Reason:" in part:
                reasoning = part.split(":", 1)[1].strip()

        print(f"Parsed values -> Classification: {classification}, Reason: {reasoning}")
        return classification, reasoning

    except Exception as e:
        print(f"Error processing message: {e}")
        return 0, "Error processing message."

# Initialize lists to store classifications and reasons
classifications = []
reasons = []

# Process each entry in the dataset
for index, row in data.iterrows():
    message = row.get("msg", "")  # Safely get 'msg' column
    if not message:
        print(f"Row {index} has no message. Skipping.")
        classifications.append(0)
        reasons.append("No message provided.")
        continue

    classification, reasoning = llama_model_fn(message)
    classifications.append(classification)
    reasons.append(reasoning)
    time.sleep(1)  # Sleep for 1 second to avoid too many requests

    # Optional: Print progress
    if (index + 1) % 10 == 0 or (index + 1) == len(data):
        print(f"Processed {index + 1}/{len(data)} messages.")

# Add classifications and reasons to the dataframe
data["predictions"] = classifications
data["reasoning"] = reasons

# Handle cases where classification was None or NaN
data["predictions"].fillna(0, inplace=True)

# Save the results to a CSV for review
output_file = ""
data.to_csv(output_file, index=False)
print(f"Classifications saved to '{output_file}'.")
print(data)


In [None]:
saved_data=pd.read_csv("")

In [None]:
saved_data.head()

In [None]:
from google.cloud import storage

def upload_to_bucket(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""

    # Initialize a storage client
    storage_client = storage.Client()

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    # Create a blob object from the bucket
    blob = bucket.blob(destination_blob_name)

    # Upload the file
    blob.upload_from_filename(source_file_name)

    print(f"File {source_file_name} uploaded to {destination_blob_name}.")


# Define your bucket name
bucket_name = ""

# File paths
source_file_name = ""
destination_blob_name =  ""

# Upload the file
upload_to_bucket(bucket_name, source_file_name, destination_blob_name)

In [None]:
#Load the data for classification

In [None]:
from google.cloud import storage
import pandas as pd

# Configure your GCS bucket and file
bucket_name = ""  # Replace with your GCS bucket name
file_path = ""  # Replace with your file's path in the bucket

# Download file from GCS
client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename("")  # Save locally

# Load the dataset
data = pd.read_csv("")

In [None]:
# Classification performance with 95% CI

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# Load the data from the CSV file (assuming the file with the 'classification' column was saved earlier)
df = pd.read_csv("")

# Extract the 'label' and 'classification' columns
y_true = df['label']
y_pred = df['predictions']

def bootstrap_confidence_interval(y_true, y_pred, metric_func, n_bootstraps=1000, ci=95, **kwargs):
    """
    Calculates the confidence interval for a given metric using bootstrapping.

    Parameters:
        y_true (pd.Series): True labels.
        y_pred (pd.Series): Predicted labels.
        metric_func (function): Scikit-learn metric function to calculate (e.g., f1_score).
        n_bootstraps (int): Number of bootstrap samples.
        ci (float): Confidence level (e.g., 95 for 95% CI).
        **kwargs: Additional keyword arguments for the metric function.

    Returns:
        tuple: Lower and upper bounds of the confidence interval.
    """
    boot_scores = []
    n = len(y_true)

    for _ in range(n_bootstraps):
        # Sample with replacement
        indices = np.random.randint(0, n, n)
        y_true_boot = y_true.iloc[indices]
        y_pred_boot = y_pred.iloc[indices]

        # Handle cases where metric might fail (e.g., no positive predictions)
        try:
            score = metric_func(y_true_boot, y_pred_boot, **kwargs)
            boot_scores.append(score)
        except ValueError:
            continue  # Skip this bootstrap sample if metric calculation fails

    # Calculate percentiles for the confidence interval
    lower = np.percentile(boot_scores, (100 - ci) / 2)
    upper = np.percentile(boot_scores, 100 - (100 - ci) / 2)
    return lower, upper

# Calculate the point estimates for the metrics
f1 = f1_score(y_true, y_pred, average='binary')
precision = precision_score(y_true, y_pred, average='binary')
recall = recall_score(y_true, y_pred, average='binary')
accuracy = accuracy_score(y_true, y_pred)

# Calculate the 95% confidence intervals using bootstrapping
f1_ci = bootstrap_confidence_interval(y_true, y_pred, f1_score, average='binary')
precision_ci = bootstrap_confidence_interval(y_true, y_pred, precision_score, average='binary')
recall_ci = bootstrap_confidence_interval(y_true, y_pred, recall_score, average='binary')
accuracy_ci = bootstrap_confidence_interval(y_true, y_pred, accuracy_score)

# Print the results
print(f'F1 Score: {f1:.4f} (95% CI: {f1_ci[0]:.4f} - {f1_ci[1]:.4f})')
print(f'Precision: {precision:.4f} (95% CI: {precision_ci[0]:.4f} - {precision_ci[1]:.4f})')
print(f'Recall: {recall:.4f} (95% CI: {recall_ci[0]:.4f} - {recall_ci[1]:.4f})')
print(f'Accuracy: {accuracy:.4f} (95% CI: {accuracy_ci[0]:.4f} - {accuracy_ci[1]:.4f})')


### **Llama 3.1_8B, temp 0.6, Base (without persona), few-shots (n=2)**

In [None]:
from google.cloud import storage
import pandas as pd

# Configure your GCS bucket and file
bucket_name = ""  # Replace with your GCS bucket name
file_path = ""  # Replace with your file's path in the bucket

# Download file from GCS
client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename("")  # Save locally

# Load the dataset
data = pd.read_csv("")

In [None]:
import pandas as pd
import re
import time
from typing import Tuple, Dict

# Define a function to call the Llama model
def llama_model_fn(text: str) -> Tuple[int, str]:
    try:
        system_instruction = "none"
        reference = (
    "Example 1:\n"

    "Example 2:\n"

)

        full_prompt = (
            f"{system_instruction}\n"
            f"Learn from: {reference}\n"
            f"Classify the following message as 1 if the person seems to have depression or anxiety, otherwise 0, and provide the reasoning.\n"
            f"Output format:\n"
            f"Classification: <0 or 1>\n"
            f"Reason: <reason_text>\n\n"
            f"Message:\n{text}"
        )

        response = client.chat.completions.create(
            model=MODEL_ID,
            messages=[{"role": "user", "content": full_prompt}],
        )

        # Corrected line: Use dot notation to access 'content'
        content = response.choices[0].message.content.strip()
        print(f"Extracted content: {content}")  # Debug: Print extracted content

        # Initialize default values
        classification = 0
        reasoning = "No reason provided."

        # Parse the response
        parts = content.split('\n')
        for part in parts:
            if "Classification:" in part:
                try:
                    classification = int(part.split(":", 1)[1].strip())
                except ValueError:
                    classification = 0  # Default to 0 in case of error
            elif "Reason:" in part:
                reasoning = part.split(":", 1)[1].strip()

        print(f"Parsed values -> Classification: {classification}, Reason: {reasoning}")
        return classification, reasoning

    except Exception as e:
        print(f"Error processing message: {e}")
        return 0, "Error processing message."

# Initialize lists to store classifications and reasons
classifications = []
reasons = []

# Process each entry in the dataset
for index, row in data.iterrows():
    message = row.get("msg", "")  # Safely get 'msg' column
    if not message:
        print(f"Row {index} has no message. Skipping.")
        classifications.append(0)
        reasons.append("No message provided.")
        continue

    classification, reasoning = llama_model_fn(message)
    classifications.append(classification)
    reasons.append(reasoning)
    time.sleep(1)  # Sleep for 1 second to avoid too many requests

    # Optional: Print progress
    if (index + 1) % 10 == 0 or (index + 1) == len(data):
        print(f"Processed {index + 1}/{len(data)} messages.")

# Add classifications and reasons to the dataframe
data["predictions"] = classifications
data["reasoning"] = reasons

# Handle cases where classification was None or NaN
data["predictions"].fillna(0, inplace=True)

# Save the results to a CSV for review
output_file = ""
data.to_csv(output_file, index=False)
print(f"Classifications saved to '{output_file}'.")
print(data)


In [None]:
saved_data = pd.read_csv("")

In [None]:
saved_data.head()

In [None]:
from google.cloud import storage

def upload_to_bucket(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""

    # Initialize a storage client
    storage_client = storage.Client()

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    # Create a blob object from the bucket
    blob = bucket.blob(destination_blob_name)

    # Upload the file
    blob.upload_from_filename(source_file_name)

    print(f"File {source_file_name} uploaded to {destination_blob_name}.")


# Define your bucket name
bucket_name = ""

# File paths
source_file_name = ""
destination_blob_name = ""

# Upload the file
upload_to_bucket(bucket_name, source_file_name, destination_blob_name)

In [None]:
#Load the data for classification

In [None]:
from google.cloud import storage
import pandas as pd

# Configure your GCS bucket and file
bucket_name = ""  # Replace with your GCS bucket name
file_path = ""  # Replace with your file's path in the bucket

# Download file from GCS
client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.download_to_filename("")  # Save locally

# Load the dataset
data = pd.read_csv("")

In [None]:
# classification performance with 95% CI

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# Load the data from the CSV file (assuming the file with the 'classification' column was saved earlier)
df = pd.read_csv("")

# Extract the 'label' and 'classification' columns
y_true = df['label']
y_pred = df['predictions']

def bootstrap_confidence_interval(y_true, y_pred, metric_func, n_bootstraps=1000, ci=95, **kwargs):
    """
    Calculates the confidence interval for a given metric using bootstrapping.

    Parameters:
        y_true (pd.Series): True labels.
        y_pred (pd.Series): Predicted labels.
        metric_func (function): Scikit-learn metric function to calculate (e.g., f1_score).
        n_bootstraps (int): Number of bootstrap samples.
        ci (float): Confidence level (e.g., 95 for 95% CI).
        **kwargs: Additional keyword arguments for the metric function.

    Returns:
        tuple: Lower and upper bounds of the confidence interval.
    """
    boot_scores = []
    n = len(y_true)

    for _ in range(n_bootstraps):
        # Sample with replacement
        indices = np.random.randint(0, n, n)
        y_true_boot = y_true.iloc[indices]
        y_pred_boot = y_pred.iloc[indices]

        # Handle cases where metric might fail (e.g., no positive predictions)
        try:
            score = metric_func(y_true_boot, y_pred_boot, **kwargs)
            boot_scores.append(score)
        except ValueError:
            continue  # Skip this bootstrap sample if metric calculation fails

    # Calculate percentiles for the confidence interval
    lower = np.percentile(boot_scores, (100 - ci) / 2)
    upper = np.percentile(boot_scores, 100 - (100 - ci) / 2)
    return lower, upper

# Calculate the point estimates for the metrics
f1 = f1_score(y_true, y_pred, average='binary')
precision = precision_score(y_true, y_pred, average='binary')
recall = recall_score(y_true, y_pred, average='binary')
accuracy = accuracy_score(y_true, y_pred)

# Calculate the 95% confidence intervals using bootstrapping
f1_ci = bootstrap_confidence_interval(y_true, y_pred, f1_score, average='binary')
precision_ci = bootstrap_confidence_interval(y_true, y_pred, precision_score, average='binary')
recall_ci = bootstrap_confidence_interval(y_true, y_pred, recall_score, average='binary')
accuracy_ci = bootstrap_confidence_interval(y_true, y_pred, accuracy_score)

# Print the results
print(f'F1 Score: {f1:.4f} (95% CI: {f1_ci[0]:.4f} - {f1_ci[1]:.4f})')
print(f'Precision: {precision:.4f} (95% CI: {precision_ci[0]:.4f} - {precision_ci[1]:.4f})')
print(f'Recall: {recall:.4f} (95% CI: {recall_ci[0]:.4f} - {recall_ci[1]:.4f})')
print(f'Accuracy: {accuracy:.4f} (95% CI: {accuracy_ci[0]:.4f} - {accuracy_ci[1]:.4f})')
