# Notebook to demonstrate Zero shot and Few shot Learning

In [51]:
import os
import random

import pandas as pd
from langchain.prompts import ChatPromptTemplate
from langchain_groq.chat_models import ChatGroq

from dotenv import load_dotenv

load_dotenv()

True

In [52]:
# Groq API and Models
Groq_Token = os.getenv('GROQ_API_KEY')  # Do not share this key with anyone

groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 1 marks deduction.

# Zero Shot 

#### LLM Working Test

In [146]:
# Statement
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts
query = f"""
* You are a sentiment analysis model.
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'.
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Sentence: {sentence}
"""

# To use Groq LLMs
model_name = "llama3.1-8b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(api_key=Groq_Token, model=groq_models[model_name], temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment Label: Neutral

Explanation: 
The sentence contains both positive and negative sentiments. The phrase "The product quality is amazing" is positive, indicating satisfaction with the product. However, the phrase "the delivery was delayed" is negative, indicating a problem with the delivery process. The phrase "However I am happy with the customer service" is also positive, indicating satisfaction with the customer service. Since the sentence contains both positive and negative sentiments, the overall sentiment is neutral.


#### Working with Human Activity Data

In [126]:
path = './Combined/Test'
activities = ["LAYING", "SITTING", "STANDING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]

In [127]:
chat_template = ChatPromptTemplate.from_messages([
    ("system", "You are provided with accelerometer data in the X, Y, and Z directions, recorded during different human activities. The activities can be one of the following: 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', or 'WALKING_UPSTAIRS'. Based on the input data from these three axes, predict the corresponding activity label without additional explanation. No code is to be produced for this, rather the final activity label based on the analysis from the different directional accelerometer data"),
    ("user", "The accelerometer readings are: X: {x_data}, Y: {y_data}, Z: {z_data}. What activity does this data represent?")
])

chain = chat_template | llm

In [154]:
n_samples = 75
system_prompt = {}
prompts = []

for activity in activities:
    activity_path = os.path.join(path, activity)

    listings = [f for f in os.listdir(activity_path) if f.endswith('.csv')]

    subjects = random.sample(listings, 1)  # one test subject for each activity

    for i in range(len(subjects)):
        file_path = os.path.join(activity_path, subjects[i])
        df = pd.read_csv(file_path)

        x = df['accx'][:n_samples].apply(lambda val: round(val, 2)).tolist()
        y = df['accy'][:n_samples].apply(lambda val: round(val, 2)).tolist()
        z = df['accz'][:n_samples].apply(lambda val: round(val, 2)).tolist()

        prompts.append({
            "value": activity,
            "data": {
                "x_data": x,
                "y_data": y,
                "z_data": z
            }
        })

In [129]:
for prompt in prompts:
  answer = chain.invoke(prompt['data'])
  print("Ground Truth:", prompt['value'])
  print("Prediction: ", answer.content)
  print()

Ground Truth: LAYING
Prediction:  Based on the provided accelerometer data, the activity is 'WALKING'.

Ground Truth: SITTING
Prediction:  Based on the provided accelerometer data, the activity is 'LAYING'.

Ground Truth: STANDING
Prediction:  Based on the provided accelerometer data, the activity is 'WALKING'.

Ground Truth: WALKING
Prediction:  Based on the provided accelerometer data, the activity is 'WALKING'.

Ground Truth: WALKING_DOWNSTAIRS
Prediction:  Based on the provided accelerometer data, the activity is 'WALKING'.

Ground Truth: WALKING_UPSTAIRS
Prediction:  Based on the provided accelerometer data, the activity is 'WALKING'.



# Few Shot

#### LLM Working Test

In [13]:
# Statement
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts
query = f"""
* You are a sentiment analysis model.
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'.
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Here are few examples:
1. Sentence: 'The customer service was excellent, and I received my order quickly.'
Sentiment: Positive

2. Sentence: 'The food was bland and the service was slow.'
Sentiment: Negative

3. Sentence: 'The product is okay, but it's not worth the price.'
Sentiment: Neutral

Sentence: {sentence}
"""

# To use Groq LLMs
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment: Positive

Explanation: Although the sentence mentions a negative aspect ("the delivery was delayed"), the positive sentiments expressed in the sentence ("The product quality is amazing" and "I am happy with the customer service") outweigh the negative one, resulting in an overall positive sentiment.


#### Working with Human Activity Data

In [167]:
path_test = './Combined/Test'
path_train = './Combined/Train'
activities = ["LAYING", "SITTING", "STANDING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]

In [168]:
chat_template = ChatPromptTemplate.from_messages([
    ("system", "You are provided with accelerometer data in the X, Y, and Z directions, recorded during different human activities. The activities can be one of the following: 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', or 'WALKING_UPSTAIRS'. Based on the input data from these three axes, predict the corresponding activity label without additional explanation. No code is to be produced for this, rather the final activity label based on the analysis from the different directional accelerometer data. Here are a few examples:"),
    ("system", "Example 1 - Activity: LAYING, {LAYING}"),
    ("system", "Example 2 - Activity: SITTING, {SITTING}"),
    ("system", "Example 3 - Activity: STANDING, {STANDING}"),
    ("system", "Example 4 - Activity: WALKING, {WALKING}"),
    ("system", "Example 5 - Activity: WALKING_DOWNSTAIRS, {WALKING_DOWNSTAIRS}"),
    ("system", "Example 6 - Activity: WALKING_UPSTAIRS, {WALKING_UPSTAIRS}"),
    ("user", "The accelerometer readings are: X: {x_data}, Y: {y_data}, Z: {z_data}. What activity does this data represent?")
])

chain = chat_template | llm

In [169]:
# Preparing system prompt with examples for few shot
for activity in activities:
    activity_path_train = os.path.join(path_train, activity)
    listings_train = [f for f in os.listdir(activity_path_train) if f.endswith('.csv')]

    subjects_train = random.sample(listings_train, 1)

    for i in range(len(subjects_train)):
        file_path = os.path.join(activity_path, subjects[i])
        df = pd.read_csv(file_path)

        x = df['accx'][:n_samples].apply(lambda val: round(val, 2)).tolist()
        y = df['accy'][:n_samples].apply(lambda val: round(val, 2)).tolist()
        z = df['accz'][:n_samples].apply(lambda val: round(val, 2)).tolist()

        system_prompt[activity] = f"X: {x}, Y: {y}, Z: {z}"

In [170]:
system_prompt

{'LAYING': 'X: [0.64, 0.67, 0.69, 0.72, 0.82, 0.91, 0.96, 0.97, 0.92, 0.87, 0.83, 0.88, 0.97, 1.02, 1.02, 1.0, 1.04, 1.12, 1.23, 1.36, 1.4, 1.42, 1.4, 1.21, 1.04, 1.02, 1.01, 1.0, 1.01, 1.0, 0.95, 0.92, 0.87, 0.79, 0.72, 0.66, 0.64, 0.63, 0.64, 0.73, 0.85, 0.95, 0.99, 1.0, 1.02, 1.02, 1.01, 1.02, 1.0, 0.99, 1.0, 1.04, 1.09, 1.12, 1.19, 1.41, 1.65, 1.7, 1.63, 1.58, 1.43, 1.17, 0.99, 0.98, 1.08, 1.14, 1.06, 0.97, 0.96, 0.92, 0.8, 0.66, 0.57, 0.53, 0.55], Y: [-0.18, -0.21, -0.16, -0.14, -0.16, -0.19, -0.23, -0.24, -0.23, -0.16, -0.08, -0.05, -0.14, -0.23, -0.19, -0.18, -0.21, -0.23, -0.33, -0.41, -0.35, -0.32, -0.38, -0.43, -0.49, -0.5, -0.43, -0.38, -0.33, -0.28, -0.24, -0.16, -0.13, -0.17, -0.18, -0.2, -0.2, -0.19, -0.21, -0.23, -0.24, -0.18, -0.07, -0.03, -0.06, -0.06, -0.03, -0.02, -0.03, -0.02, -0.04, -0.09, -0.17, -0.19, -0.13, -0.14, -0.24, -0.35, -0.44, -0.52, -0.52, -0.42, -0.38, -0.37, -0.33, -0.39, -0.39, -0.32, -0.27, -0.2, -0.16, -0.11, -0.08, -0.13, -0.12], Z: [0.05, 0.07, 0

In [171]:
for prompt in prompts:
  combined_keys = {**system_prompt, **prompt['data']}
  answer = chain.invoke(combined_keys)
  print("Ground Truth:", prompt['value'])
  print("Prediction: ", answer.content)
  print()

Ground Truth: LAYING
Prediction:  Based on the accelerometer data provided, the activity is likely 'LAYING'. The X-axis values are relatively stable and close to zero, indicating minimal movement in the horizontal direction. The Y-axis values are also relatively stable, with a slight increase and decrease, but overall, they are close to zero. The Z-axis values show a slight increase and decrease, but again, they are close to zero. This suggests that the person is lying down and not moving much.

Ground Truth: SITTING
Prediction:  Based on the accelerometer data provided, the activity is likely 'LAYING'. The X-axis values are relatively constant, indicating minimal movement in the horizontal direction. The Y-axis values also show minimal variation, suggesting that the person is not moving up or down. The Z-axis values show a slight variation, but it is not significant enough to indicate any specific activity. The overall pattern of the data suggests that the person is in a stationary po

Few-Shot Learning performs better because the model has some context for the task. This is due to the model being better tuned to the specific patterns in the few provided examples.

#### Q2



#### Q3

Since zero-shot learning relies on the model's general knowledge, it might not perform well when the task involves nuanced or highly specific data, such as classifying activities based on accelerometer readings without any prior examples. In scenarios where the model doesn't have enough contextual information, it may misinterpret the input, leading to inaccurate predictions.

The performance of few-shot learning heavily depends on the quality and representativeness of the examples provided. If the examples are not diverse or comprehensive enough, the model's predictions may still be less robust compared to a fully trained model. Despite these limitations, few-shot learning is generally more effective than zero-shot learning for tasks requiring specific domain knowledge.

#### Q4

New activity not seen before

In [159]:
# removing some of the activity class exmaples in the examples

chat_template = ChatPromptTemplate.from_messages([
    ("system", "You are provided with accelerometer data in the X, Y, and Z directions, recorded during different human activities. The activities can be one of the following: 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', or 'WALKING_UPSTAIRS'. Based on the input data from these three axes, predict the corresponding activity label without additional explanation. No code is to be produced for this, rather the final activity label only based on the analysis from the different directional accelerometer data. Here are a few examples:"),
    ("system", "Example 1 - Activity: LAYING, {LAYING}"),
    ("system", "Example 2 - Activity: SITTING, {SITTING}"),
    ("system", "Example 3 - Activity: WALKING_DOWNSTAIRS, {WALKING_DOWNSTAIRS}"),
    ("system", "Example 4 - Activity: WALKING_UPSTAIRS, {WALKING_UPSTAIRS}"),
    ("user", "The accelerometer readings are: X: {x_data}, Y: {y_data}, Z: {z_data}. What activity does this data represent?")
])

chain = chat_template | llm

In [163]:
del system_prompt['STANDING']
del system_prompt['WALKING']

for prompt in prompts[2:4]:
  combined_keys = {**system_prompt, **prompt['data']}
  answer = chain.invoke(combined_keys)
  print("Ground Truth:", prompt['value'])
  print("Prediction: ", answer.content)
  print()

Ground Truth: STANDING
Prediction:  Based on the accelerometer data provided, the activity is likely 'LAYING'.

Ground Truth: WALKING
Prediction:  Based on the accelerometer data provided, the activity is likely 'WALKING'.



Thus, the model does not perform well on the data not seen before

#### Q5

In [173]:
rand_data = {
    'x_data': [0.63, 0.65, 0.68, 0.71, 0.81, 0.89, 0.94, 0.95, 0.91, 0.86, 0.82, 0.87, 0.96, 1.01, 1.01, 0.99, 1.03, 1.1, 1.21, 1.33, 1.38, 1.4, 1.38, 1.19, 1.02, 1.0, 0.99, 0.98, 0.99, 0.98, 0.93, 0.9, 0.85, 0.77, 0.7, 0.64, 0.62, 0.61, 0.62, 0.71, 0.83, 0.93, 0.97, 0.98, 1.0, 1.0, 0.99, 1.0, 0.98, 0.97, 0.98, 1.02, 1.07, 1.1, 1.17, 1.39, 1.63, 1.68, 1.61, 1.56, 1.41, 1.15, 0.97, 0.96, 1.06, 1.12, 1.04, 0.95, 0.94, 0.9, 0.78, 0.64, 0.55, 0.51, 0.53],
    'y_data': [-0.19, -0.22, -0.17, -0.15, -0.17, -0.2, -0.24, -0.25, -0.24, -0.17, -0.09, -0.06, -0.15, -0.24, -0.2, -0.19, -0.22, -0.24, -0.34, -0.42, -0.36, -0.33, -0.39, -0.44, -0.5, -0.51, -0.44, -0.39, -0.34, -0.29, -0.25, -0.17, -0.14, -0.18, -0.19, -0.21, -0.21, -0.2, -0.22, -0.24, -0.25, -0.19, -0.08, -0.04, -0.07, -0.07, -0.04, -0.03, -0.04, -0.03, -0.05, -0.1, -0.18, -0.2, -0.14, -0.15, -0.25, -0.36, -0.45, -0.53, -0.53, -0.43, -0.39, -0.38, -0.34, -0.4, -0.4, -0.33, -0.28, -0.21, -0.17, -0.12, -0.09, -0.14, -0.13],
    'z_data': [0.06, 0.08, 0.12, 0.1, 0.09, 0.15, 0.21, 0.26, 0.27, 0.27, 0.27, 0.23, 0.2, 0.24, 0.26, 0.23, 0.18, 0.16, 0.2, 0.27, 0.3, 0.32, 0.33, 0.27, 0.19, 0.13, 0.09, 0.11, 0.11, 0.07, 0.06, 0.07, 0.08, 0.07, 0.06, 0.05, 0.01, 0.0, 0.02, 0.03, 0.04, 0.05, 0.05, 0.04, 0.02, 0.04, 0.07, 0.08, 0.07, 0.05, 0.04, 0.04, 0.03, 0.03, 0.01, -0.05, -0.09, -0.06, -0.02, 0.0, -0.02, -0.06, -0.06, -0.01, 0.03, 0.04, 0.06, 0.09, 0.08, 0.03, 0.02, 0.02, 0.01, 0.01, 0.03]
}

In [174]:
combined_keys = {**system_prompt, **rand_data}
answer = chain.invoke(combined_keys)
print("Ground Truth: Random Activity Data")
print("Prediction: ", answer.content)

Ground Truth: Random Activity Data
Prediction:  Based on the accelerometer data provided, the activity is likely 'STANDING'. The data shows a relatively consistent pattern in the X, Y, and Z directions, with some minor fluctuations. The values in the X direction are generally increasing and then decreasing, which is consistent with a standing activity. The Y direction values are also relatively consistent, with some minor fluctuations. The Z direction values show a slight increase and then decrease, which is also consistent with a standing activity. Overall, the data suggests that the person is standing still, with some minor movements.
