# Notebook to demonstrate Zero shot and Few shot Learning

In [12]:
import numpy as np
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
import random
import time

from dotenv import load_dotenv
import os
load_dotenv()

True

In [20]:
# Groq API and Models 
Groq_Token = os.getenv('GROQ_API_KEY') # Do not share this key with anyone

groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 0.5 marks deduction.

### Loading the training and testing sets

In [14]:
X_train = np.load('../SavedDataset/Train/X_train.npy')
X_test = np.load('../SavedDataset/Test/X_test.npy')
y_train = np.load('../SavedDataset/Train/y_train.npy')
y_test = np.load('../SavedDataset/Test/y_test.npy')


print("Training data shape: ",X_train.shape)
print("Training labels shape: ",y_train.shape)
print("Testing data shape: ",X_test.shape)
print("Testing labels shape: ",y_test.shape)

activity_dict = {
    1: "WALKING",
    2: "WALKING_UPSTAIRS",
    3: "WALKING_DOWNSTAIRS",
    4: "SITTING",
    5: "STANDING",
    6: "LAYING"
}

activity_reverse_dict = {v: k for k, v in activity_dict.items()}

print("Activity Dictionary: ",activity_reverse_dict)

Training data shape:  (126, 500, 3)
Training labels shape:  (126,)
Testing data shape:  (54, 500, 3)
Testing labels shape:  (54,)
Activity Dictionary:  {'WALKING': 1, 'WALKING_UPSTAIRS': 2, 'WALKING_DOWNSTAIRS': 3, 'SITTING': 4, 'STANDING': 5, 'LAYING': 6}


In [15]:
import json
def format_data_for_prompt(data):
    """Convert structured accelerometer data into a JSON format string."""
    return json.dumps(data.tolist())

print(format_data_for_prompt(X_train[0]))


[[1.19648, -0.1606898, -0.740178], [1.22203, -0.2672951, -0.7717485], [1.22311, -0.4338634, -0.7678422], [1.119198, -0.5337904, -0.7563089], [0.9158428, -0.4961259, -0.6291351], [0.8232005, -0.475312, -0.5338143], [0.8665536, -0.4580166, -0.5249095], [0.8930115, -0.3355446, -0.4887192], [0.964351, -0.2377609, -0.4879413], [1.060564, -0.1909076, -0.4934439], [1.057602, -0.1695583, -0.4591031], [0.9580277, -0.2363204, -0.4317126], [0.7992838, -0.3106481, -0.3664186], [0.6793694, -0.3515589, -0.2993257], [0.6064836, -0.364931, -0.269164], [0.5399829, -0.3310031, -0.243923], [0.5384855, -0.2876898, -0.2467077], [0.5715013, -0.2263525, -0.2647463], [0.596913, -0.1748644, -0.2764318], [0.635042, -0.1730205, -0.2743757], [0.6510546, -0.1950147, -0.2579281], [0.6566896, -0.2516222, -0.2319569], [0.6993972, -0.3159723, -0.1882381], [0.7610235, -0.3351564, -0.1817709], [0.8556888, -0.3292053, -0.2100259], [0.9661283, -0.3242493, -0.2098315], [1.023589, -0.3082657, -0.2134342], [1.081837, -0.3011

# Zero Shot 

In [6]:
# model_name = "llama3.1-70b"
model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)

predictions = []
# num_samples = X_train.shape[0]
num_samples = 10

for i in range(num_samples):
    # Extract the 500x3 data for the ith person
    X_i = X_train[i]

    # Format the data as a JSON string
    data_str = format_data_for_prompt(X_i)

    # Generate a prompt for the model

    prompt = f"""
    You are a highly trained human activity classification model.

    Your task is to analyze the given accelerometer data and classify the human activity into one of the following categories:
    - WALKING
    - WALKING_UPSTAIRS
    - WALKING_DOWNSTAIRS
    - SITTING
    - STANDING
    - LAYING

    Here is the accelerometer data provided:
    - You have 500 readings, each containing three accelerometer values: (acceleration_x, acceleration_y, acceleration_z).
    - The data is collected over a 10-second period at a sampling rate of 50 Hz, which gives 500 readings.

    Data Format:
    - The data is provided as a nested list. Each inner lsit represents a single reading: (acceleration_x, acceleration_y, acceleration_z).

    Please analyze the data and provide the most likely activity label from the list above. Provide ONLY the classification label (from the given options above) as output.

    Data: {data_str}
    """

    # prompt = f"""
    # * You are a human activity classification model.
    # * Your task is to analyze the given accelerometer data and classify the human activity as "WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING", "STANDING" or "LAYING".
    # * The data is formatted as a list of accelerometer readings where each reading is a tuple of three values: (acceleration_x, acceleration_y, acceleration_z).
    # * You have 500 such readings, each containing three accelerometer values (acceleration_x, acceleration_y, acceleration_z).
    # * These readings were taken over a 10-second period at a sampling rate of 50 Hz.
    # * You can run do any mathematical calculations on the data to classify the activity. I just want the activity name.
    # * Provide ONLY the classification label (from the given options above) as output.

    # Data: {data_str}
    # """
    
    answer = llm.invoke(prompt)
    activity_name = answer.content.strip()
    print(f"Activity: {activity_name}")
    activity_number = activity_reverse_dict.get(activity_name, -1)  # Use -1 for unknown activities
    
    predictions.append(activity_number)

# Compare predictions with actual y_train labels
correct = 0

for i in range(num_samples):
    if predictions[i] == y_train[i]:
        correct += 1

accuracy = correct / num_samples
print(f"Accuracy: {accuracy:.2f}")
print("Accuracy Percentage: ",accuracy*100)

Activity: WALKING
Activity: LAYING
Activity: LAYING
Activity: WALKING
Activity: WALKING
Activity: WALKING
Activity: LAYING
Activity: WALKING
Activity: LAYING
Activity: LAYING
Accuracy: 0.20
Accuracy Percentage:  20.0


# Few Shot

In [16]:
num_examples = 5
random_indices = random.sample(range(X_train.shape[0]), num_examples)
print("Random Indices: ",random_indices)

Random Indices:  [40, 35, 25, 37, 3]


In [21]:
examples = []
for idx in random_indices:
    data_example = X_train[idx]
    label_example = y_train[idx]
    data_str = format_data_for_prompt(data_example)
    examples.append((data_str, activity_dict[label_example]))
    


predictions = []
# num_samples = X_train.shape[0]
num_samples = 10
# get random samples
random_test = random.sample(range(X_train.shape[0]), num_samples)

api_index = 0

for i in random_test:

    # model_name = "llama3.1-70b"
    model_name = "llama3.1-8b"
    llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)

    # Extract the 500x3 data for the ith person
    X_i = X_train[i]

    # Format the data as a JSON string
    data_str = format_data_for_prompt(X_i)

    # Generate a prompt for the model

    prompt = f"""
    You are a highly trained human activity classification model.

    Your task is to analyze the given accelerometer data and classify the human activity into one of the following categories:
    - WALKING
    - WALKING_UPSTAIRS
    - WALKING_DOWNSTAIRS
    - SITTING
    - STANDING
    - LAYING

    Here is the accelerometer data provided:
    - You have 500 readings, each containing three accelerometer values: (acceleration_x, acceleration_y, acceleration_z).
    - The data is collected over a 10-second period at a sampling rate of 50 Hz, which gives 500 readings.

    Data Format:
    - The data is provided as a nested list. Each inner lsit represents a single reading: (acceleration_x, acceleration_y, acceleration_z).

    Here are few examples:
    - Example 1: {examples[0][0]} -> {examples[0][1]}
    - Example 2: {examples[1][0]} -> {examples[1][1]}
    - Example 3: {examples[2][0]} -> {examples[2][1]}
    - Example 4: {examples[3][0]} -> {examples[3][1]}
    - Example 5: {examples[4][0]} -> {examples[4][1]}
    

    Please analyze the examples extensively and provide the most likely activity label for the below data from the list above.
    
    Provide ONLY the classification label (from the given options above) as output.

    Data: {data_str}
    """
    
    answer = llm.invoke(prompt)
    activity_name = answer.content.strip()
    print(f"Activity: {activity_name}")
    activity_number = activity_reverse_dict.get(activity_name, -1)  # Use -1 for unknown activities
    
    predictions.append(activity_number)

    if api_index == 7:
        api_index = 0
    else:
        api_index += 1

# Compare predictions with actual y_train labels
correct = 0

for i in random_test:
    if predictions[i] == y_train[i]:
        correct += 1

accuracy = correct / num_samples
print(f"Accuracy: {accuracy:.2f}")
print("Accuracy Percentage: ",accuracy*100)

Activity: WALKING_DOWNSTAIRS
Activity: WALKING_UPSTAIRS
Activity: WALKING_DOWNSTAIRS
Activity: WALKING_UPSTAIRS
Activity: WALKING_DOWNSTAIRS
Activity: WALKING_DOWNSTAIRS
Activity: WALKING_UPSTAIRS
Activity: WALKING_UPSTAIRS
Activity: WALKING_DOWNSTAIRS
Activity: WALKING_DOWNSTAIRS


IndexError: list index out of range

In [23]:
for i in range(num_samples):
    
    if predictions[i] == y_train[random_test[i]]:
        correct += 1

accuracy = correct / num_samples
print(f"Accuracy: {accuracy:.2f}")
print("Accuracy Percentage: ",accuracy*100)

Accuracy: 0.10
Accuracy Percentage:  10.0


## Testing

In [5]:
# for i in range(len(groq_models)):
#     model_name = list(groq_models.keys())[i]
#     llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
#     print(f"Model: {model_name}")
#     try:
#         X_i = X_train[0]
#         data_str = format_data_for_prompt(X_i)
#         # print(data_str)
#         prompt = f"""
#         * You are a human activity classification model.
#         * Your task is to analyze the given accelerometer data and predict the human activity (e.g., walking, running, sitting, etc.).
#         * The data is formatted as a list of accelerometer readings where each reading is a tuple of three values: (acceleration_x, acceleration_y, acceleration_z).
#         * You have 500 such readings, each containing three accelerometer values (acceleration_x, acceleration_y, acceleration_z).
#         * These readings were taken over a 10-second period at a sampling rate of 50 Hz.
#         * Provide ONLY the classification label (from the given options above) as output.

#         Data: {data_str}
#         """

#         answer = llm.invoke(prompt)

#         print(answer.content)
#         print("model_name: ", model_name)

#     except Exception as e:
#         print(f"Error: {e}, Model: {model_name}")
#         continue


Model: llama3-70b
Error: Error code: 400 - {'error': {'message': 'Bad Request', 'type': 'invalid_request_error'}}, Model: llama3-70b
Model: mixtral
Based on the given accelerometer data, it is difficult to provide an accurate classification of the human activity without further information or context. The data shows varying levels of acceleration in different directions, but these patterns can correspond to a variety of activities such as walking, running, or even staying still while moving one's arms.

Therefore, it is not possible to provide a definitive answer based solely on the provided data.
model_name:  mixtral
Model: gemma-7b
Error: Error code: 400 - {'error': {'message': 'Bad Request', 'type': 'invalid_request_error'}}, Model: gemma-7b
Model: llama3.1-70b
Walking
model_name:  llama3.1-70b
Model: llama3-8b
Error: Error code: 400 - {'error': {'message': 'Bad Request', 'type': 'invalid_request_error'}}, Model: llama3-8b
Model: llama3.1-8b
walking
model_name:  llama3.1-8b
Model: g

In [None]:
# model_name = "llama3.1-70b"
# llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
# # print(f"Model: {model_name}")
# X_i = X_train[0]
# data_str = format_data_for_prompt(X_i)
# # print(data_str)
# prompt = f"""
# * You are a human activity classification model.
# * Your task is to analyze the given accelerometer data and predict the human activity (e.g., walking, running, sitting, etc.).
# * The data is formatted as a list of accelerometer readings where each reading is a tuple of three values: (acceleration_x, acceleration_y, acceleration_z).
# * You have 500 such readings, each containing three accelerometer values (acceleration_x, acceleration_y, acceleration_z).
# * These readings were taken over a 10-second period at a sampling rate of 50 Hz.
# * Provide ONLY the classification label as output.

# Data: {data_str}
# """

# answer = llm.invoke(prompt)

# print(answer.content)
# print("model_name: ", model_name)

