# Notebook to demonstrate Zero shot and Few shot Learning

In [3]:
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [5]:
# Groq API and Models 
Groq_Token = os.getenv('GROQ_API_TOKEN')
groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 0.5 marks deduction.

# Zero Shot 

In [3]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "gemma-9b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)

answer = llm.invoke(query)
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}
classes_list = list(classes.keys())
print(answer.content)

Sentiment: **Positive**

**Explanation:**

While the delivery was delayed, the user expresses positive sentiment towards the product quality and customer service. The phrase "amazing" clearly indicates a positive feeling about the product, and "happy" reinforces the positive sentiment overall.  The "however" acknowledges a negative aspect, but it doesn't outweigh the positive feelings expressed. 





> ## TASK 4:

> 1. Demonstrate how to use Zero-Shot Learning and Few-Shot Learning to classify human activities based on the featurized accelerometer data. Qualitatively demonstrate the performance of Few-Shot Learning with Zero-Shot Learning. Which method performs better? Why?


In [75]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from langchain_groq.chat_models import ChatGroq

# Load and preprocess data
X_dataset = pd.read_csv("X_Train_Test_data.csv")
y_dataset = pd.read_csv("Y_train_test_data.csv")
X_array = np.array(X_dataset).reshape(180, 500, 3)
y_array = np.array(y_dataset["class_label"])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array, test_size=0.3, random_state=40, stratify=y_array)

# Initialize LLM
model_name = "llama3-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)

def predict_activity(data, llm = llm):
    predictions = []
    for i in range(len(data)):
        query = f"""
        You are a data analysis model. Predict the type of human activity from ["WALKING","WALKING_UPSTAIRS","WALKING_DOWNSTAIRS","SITTING","STANDING","LAYING"].
        Given: Accelerometer data (x,y,z axes) for 10 seconds at 50 Hz (500 observations).
        Predict the activity type based on this data. Don't explain you reasoning.
        Keep the answer precise and concise and give it in the format given below:
        Answer format-> Activity_Prediction: "Your Answer".
        Data: {data[i]}
        """
        answer = llm.invoke(query)
        print(f"{i+1} :", answer.content)
        predictions.append(answer.content)

    print(f"Number of predictions: {len(predictions)}")
    return predictions

predictions = predict_activity(X_test)


1 : Activity_Prediction: SITTING
2 : Activity_Prediction: SITTING
3 : Activity_Prediction: WALKING
4 : Activity_Prediction: SITTING
5 : Activity_Prediction: SITTING
6 : Activity_Prediction: SITTING
7 : Activity_Prediction: WALKING
8 : Activity_Prediction: SITTING
9 : Activity_Prediction: WALKING
10 : Activity_Prediction: WALKING
11 : Activity_Prediction: SITTING
12 : Activity_Prediction: WALKING
13 : Activity_Prediction: WALKING
14 : Activity_Prediction: WALKING
15 : Activity_Prediction: WALKING
16 : Activity_Prediction: SITTING
17 : Activity_Prediction: WALKING
18 : Activity_Prediction: WALKING
19 : Activity_Prediction: WALKING
20 : Activity_Prediction: WALKING
21 : Activity_Prediction: SITTING
22 : Activity_Prediction: WALKING
23 : Activity_Prediction: WALKING
24 : Activity_Prediction: SITTING
25 : Activity_Prediction: SITTING
26 : Activity_Prediction: WALKING
27 : Activity_Prediction: SITTING
28 : Activity_Prediction: WALKING
29 : Activity_Prediction: SITTING
30 : Activity_Predictio

In [76]:
import re

input = """
1 : Activity_Prediction: SITTING
2 : Activity_Prediction: SITTING
3 : Activity_Prediction: WALKING
4 : Activity_Prediction: SITTING
5 : Activity_Prediction: SITTING
6 : Activity_Prediction: SITTING
7 : Activity_Prediction: WALKING
8 : Activity_Prediction: SITTING
9 : Activity_Prediction: WALKING
10 : Activity_Prediction: WALKING
11 : Activity_Prediction: SITTING
12 : Activity_Prediction: WALKING
13 : Activity_Prediction: WALKING
14 : Activity_Prediction: WALKING
15 : Activity_Prediction: WALKING
16 : Activity_Prediction: SITTING
17 : Activity_Prediction: WALKING
18 : Activity_Prediction: WALKING
19 : Activity_Prediction: WALKING
20 : Activity_Prediction: WALKING
21 : Activity_Prediction: SITTING
22 : Activity_Prediction: WALKING
23 : Activity_Prediction: WALKING
24 : Activity_Prediction: SITTING
25 : Activity_Prediction: SITTING
26 : Activity_Prediction: WALKING
27 : Activity_Prediction: SITTING
28 : Activity_Prediction: WALKING
29 : Activity_Prediction: SITTING
30 : Activity_Prediction: WALKING
31 : Activity_Prediction: SITTING
32 : Activity_Prediction: WALKING
33 : Activity_Prediction: WALKING
34 : Activity_Prediction: WALKING
35 : Activity_Prediction: WALKING
36 : Activity_Prediction: WALKING
37 : Activity_Prediction: SITTING
38 : Activity_Prediction: WALKING
39 : Activity_Prediction: SITTING
40 : Activity_Prediction: SITTING
41 : Activity_Prediction: WALKING
42 : Activity_Prediction: SITTING
43 : Activity_Prediction: WALKING
44 : Activity_Prediction: WALKING
45 : Activity_Prediction: SITTING
46 : Activity_Prediction: WALKING
47 : Activity_Prediction: WALKING
48 : Activity_Prediction: WALKING
49 : Activity_Prediction: WALKING
50 : Activity_Prediction: WALKING
51 : Activity_Prediction: WALKING
52 : Activity_Prediction: "WALKING"
53 : Activity_Prediction: WALKING
54 : Activity_Prediction: WALKING
Number of predictions: 54
"""

zero_shot_predictions = re.findall(r'\d+\s+:\s+\w+\s*:\s+["]?(\w+)["]?', input)
# len(zero_shot_predictions)
activities = {v:k for k, v in classes.items()}
activity_labels_0 = list(map(lambda x: activities[x], y_test))
# len(activity_labels_0)
accuracy = sum(a==b for a,b, in zip(zero_shot_predictions, activity_labels_0)) / len(activity_labels_0)
accuracy

0.2222222222222222

In [25]:
# import re
# pattern = r'"(.*)"'

# extracted_predictions = [re.findall(pattern, answer) for answer in predictions]
# flattened_predictions = [pred for p1 in extracted_predictions for pred in p1]
# # len(flattened_predictions)
# y_train_pred = list(map(lambda x: classes.get(x), flattened_predictions))
# y_train_1 = y_train[:50]


# Few Shot

In [58]:
# Statement 
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}
activities = {v:k for k, v in classes.items()}

train_data = X_train
train_labels = list(map(lambda x: activities[x], y_train))
test_data = X_test
test_labels = list(map(lambda x: activities[x], y_test))
# print(train_labels, test_labels, sep="\n")

query= f"""
    * You are an data analysis model.
    * Your task is to identify the type of activity after analysing and finding similarities of given labelled data.
    * You will be given two sets of data (train_data, train_labels) & (test_data)
    * test_labels are not given to you, your job will be to predict those
    * These dataset contains data from accelerometer measuring acceleration in 3 axes: x-axis, y-axis, z-axis.
    * Your answer is to be in the form of list, containing prediction for each activity in test_data.
    * train_data.shape = (126, 500, 3) meaning 126 activities with 500 observations of accelerometer along 3 axes.
    * train_labels.shape = (126,) : labels of 126 activities
    * test_data.shape = (54, 500, 3) : 54 activity.
    * Give your answer in form of list containing all 54 predictions of each activity

    Here's the data for you:
    train_data = {train_data}
    train_labels = {train_labels}
    test_data = {test_data}
"""

# To use Groq LLMs 
model_name = "llama3-8b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)
print(answer.content)

I'll analyze the data and predict the activities in the test data.

After analyzing the data, I found that the activities can be broadly categorized into six classes: 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'LAYING', 'SITTING', and 'STANDING'.

Using a classification algorithm, I predicted the activities in the test data as follows:

```
['WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'LAYING', 'SITTING', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_DOWNSTAIRS', 'WALKING_

In [61]:
query_2 = f"""
    Are you sure about your answer as it seems to contain way more values than what I provided in that test_data set which was 54 predictions only?
    Your answer: {answer}

    * "Here's the data for reanalysis, do better this time"
    * train_data.shape = (126, 500, 3) meaning 126 activities with 500 observations of accelerometer along 3 axes.
    * train_labels.shape = (126,) : labels of 126 activities
    * test_data.shape = (54, 500, 3) : 54 activity.
    * Give your answer in form of list containing all 54 predictions of each activity

    Here's the data for you:
    train_data = {train_data}
    train_labels = {train_labels}
    test_data = {test_data}
    
"""
answer = llm.invoke(query_2)
print(answer.content)

I apologize for the mistake earlier. I will re-analyze the test data and generate a new response.

Based on the provided test data, I predict the following 54 activities:

1. WALKING_UPSTAIRS
2. WALKING_UPSTAIRS
3. LAYING
4. SITTING
5. WALKING
6. WALKING_DOWNSTAIRS
7. SITTING
8. STANDING
9. WALKING_UPSTAIRS
10. LAYING
11. WALKING_UPSTAIRS
12. SITTING
13. WALKING_DOWNSTAIRS
14. LAYING
15. WALKING_UPSTAIRS
16. WALKING_UPSTAIRS
17. SITTING
18. STANDING
19. WALKING_UPSTAIRS
20. LAYING
21. WALKING_DOWNSTAIRS
22. WALKING_UPSTAIRS
23. SITTING
24. STANDING
25. WALKING_UPSTAIRS
26. LAYING
27. WALKING_UPSTAIRS
28. SITTING
29. WALKING_DOWNSTAIRS
30. LAYING
31. WALKING_UPSTAIRS
32. WALKING_UPSTAIRS
33. SITTING
34. STANDING
35. WALKING_UPSTAIRS
36. LAYING
37. WALKING_DOWNSTAIRS
38. WALKING_UPSTAIRS
39. SITTING
40. STANDING
41. WALKING_UPSTAIRS
42. LAYING
43. WALKING_UPSTAIRS
44. SITTING
45. WALKING_DOWNSTAIRS
46. LAYING
47. WALKING_UPSTAIRS
48. WALKING_UPSTAIRS
49. SITTING
50. STANDING
51. WALKING_

In [64]:
activity_labels = [
    "WALKING_UPSTAIRS", "WALKING_UPSTAIRS", "LAYING", "SITTING", "WALKING", 
    "WALKING_DOWNSTAIRS", "SITTING", "STANDING", "WALKING_UPSTAIRS", "LAYING", 
    "WALKING_UPSTAIRS", "SITTING", "WALKING_DOWNSTAIRS", "LAYING", "WALKING_UPSTAIRS", 
    "WALKING_UPSTAIRS", "SITTING", "STANDING", "WALKING_UPSTAIRS", "LAYING", 
    "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS", "SITTING", "STANDING", "WALKING_UPSTAIRS", 
    "LAYING", "WALKING_UPSTAIRS", "SITTING", "WALKING_DOWNSTAIRS", "LAYING", 
    "WALKING_UPSTAIRS", "WALKING_UPSTAIRS", "SITTING", "STANDING", "WALKING_UPSTAIRS", 
    "LAYING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS", "SITTING", "STANDING", 
    "WALKING_UPSTAIRS", "LAYING", "WALKING_UPSTAIRS", "SITTING", "WALKING_DOWNSTAIRS", 
    "LAYING", "WALKING_UPSTAIRS", "WALKING_UPSTAIRS", "SITTING", "STANDING", 
    "WALKING_UPSTAIRS", "LAYING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"
]

# print(len(pred),  len(test_labels))


accuracy = sum((a==b for a, b in zip(activity_labels, test_labels))) / len(activity_labels)
print(accuracy)
# So, even with few-shot not a great change in accuracy. And it's even prediciton more values than asked for.


0.12962962962962962
