# Notebook to demonstrate Zero shot and Few shot Learning

In [1]:
import pandas as pd 
#%pip install langchain_groq
from langchain_groq.chat_models import ChatGroq
import pandas as pd
import numpy as np
from itertools import product
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, recall_score

In [2]:
# Groq API and Models 
Groq_Token = open('..\\groqapi.txt', 'r').read()  # Do not share this key with anyone
groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 0.5 marks deduction.

# Zero Shot 

In [114]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment label: Neutral

Explanation: The sentence expresses mixed sentiments. The words "amazing" and "happy" convey a positive sentiment, indicating satisfaction with the product quality and customer service. However, the phrase "delivery was delayed" expresses a negative sentiment, indicating dissatisfaction with the delivery experience. Overall, the positive and negative sentiments balance each other out, resulting in a neutral sentiment label.


# Few Shot

In [4]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Here are few examples:
1. Sentence: 'The customer service was excellent, and I received my order quickly.'
Sentiment: Positive

2. Sentence: 'The food was bland and the service was slow.'
Sentiment: Negative

3. Sentence: 'The product is okay, but it's not worth the price.'
Sentiment: Neutral

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment: Positive

Explanation: Although the sentence mentions a negative aspect ("the delivery was delayed"), the positive sentiments ("The product quality is amazing" and "I am happy with the customer service") outweigh the negative one, resulting in an overall positive sentiment. The use of the word "amazing" and "happy" also indicates a strong positive emotion, which contributes to the positive sentiment classification.


In [3]:
f = open("C:\\Users\\Rajeev Wankar\\Desktop\\Aarsh IITGN\\ES335 Machine Learning\\main\\ES335-MLAssignment1\\HAR\\UCI HAR Dataset\\features.txt", 'r').read().split('\n')

In [7]:
feature_labels = pd.read_csv(r'../HAR/UCI HAR Dataset/features.txt',sep = '\\s+',header=None)
dataframeX = pd.read_csv(r'../HAR/UCI HAR Dataset/train/X_train.txt',sep = '\\s+',header=None)
dataframeY = pd.read_csv(r'../HAR/UCI HAR Dataset/train/y_train.txt',sep = '\\s+',header=None)
correlation_matrix = dataframeX.corr()

In [8]:
def reduce_features(threshold = None):
    #feature_labels = pd.read_csv(r'../HAR/UCI HAR Dataset/features.txt',sep = '\\s+',header=None)
    #dataframeX = pd.read_csv(r'../HAR/UCI HAR Dataset/train/X_train.txt',sep = '\\s+',header=None)
    #dataframeY = pd.read_csv(r'../HAR/UCI HAR Dataset/train/y_train.txt',sep = '\\s+',header=None)
    #test_values_y = pd.read_csv(r'../HAR/UCI HAR Dataset/test/y_test.txt',sep = "\\s+",header=None)
    #test_data_X = pd.read_csv(r'../HAR/UCI HAR Dataset/test/X_test.txt',sep = "\\s+",header=None)
    #correlation_matrix = dataframeX.corr()
    removals = set()
    pairs = product(range(len(correlation_matrix.columns)), repeat=2)

    for i, j in pairs:
        if i > j:
            correlation_value = correlation_matrix.iloc[i, j]
            if abs(correlation_value) > threshold and correlation_value != 1:
                if i not in removals and j not in removals:
                    removals.add(i)
    #print(removals)
    selected_features = [col for col in dataframeX.columns if col not in removals]
    return selected_features
    
    # clf = tree.DecisionTreeClassifier(max_depth = depth)
    # clf = clf.fit(filtered_dfX, dataframeY)
    # tree.plot_tree(clf)
    
    # filtered_testx =test_data_X[selected_features]
    
    # y_pred = clf.predict(filtered_testx)
    # y_pred = np.array(y_pred)
    # y_test = test_values_y[0].to_numpy()
    # print(filtered_testx.shape)
    # accuracy = accuracy_score(y_test, y_pred)
    # print(f"Accuracy: {accuracy}")
    # precision = precision_score(y_test, y_pred,average='weighted')
    # print(f"Precision: {precision}")
    # recall = recall_score(y_test, y_pred, average='weighted')
    # print(f"Recall: {recall}")
    # cm = confusion_matrix(y_test, y_pred)
    # print("Confusion Matrix:")
    # print(cm)

    

In [9]:
frs = reduce_features(threshold=0.9)

In [10]:
features = np.array(feature_labels[1].iloc[frs])

def example(n):
    return dataframeX[frs].iloc[n, :].to_numpy()

activity_dic = {1: "WALKING", 2: "WALKING_UPSTAIRS", 3: "WALKING_DOWNSTAIRS", 4: "SITTING", 5: "STANDING", 6: "LAYING"}
def label(n):
    #print(dataframeY.iloc[n, 0])
    return (activity_dic[dataframeY.iloc[n, 0]])
    
label(10)
print(label(100))
print(label(234))
print(label(134))
print(label(135))

WALKING
SITTING
WALKING_DOWNSTAIRS
WALKING_DOWNSTAIRS


In [11]:
label(135)

'WALKING_DOWNSTAIRS'

In [13]:
print(len(features))

233


In [77]:
dataframeY.iloc[5314]

0    4
Name: 5314, dtype: int64

In [78]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."
# System Prompts 
query = f"""
* You are a Human Activity Recognition model.
* Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
* The activities are:
1 WALKING
2 WALKING_UPSTAIRS
3 WALKING_DOWNSTAIRS
4 SITTING
5 STANDING
6 LAYING
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
Feature Names: {features}
Feature Data: {x_example}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Based on the feature data provided, I predict that the activity being performed by the person is **WALKING**.

My reasoning is as follows:

* The values of `tBodyAcc-mean()-X`, `tBodyAcc-mean()-Y`, and `tBodyAcc-mean()-Z` are relatively small, indicating that the body acceleration is not too high in any direction, which is consistent with walking.
* The values of `tBodyAcc-std()-X`, `tBodyAcc-std()-Y`, and `tBodyAcc-std()-Z` are relatively low, indicating that the body acceleration is not too variable, which is also consistent with walking.
* The values of `tBodyGyro-mean()-X`, `tBodyGyro-mean()-Y`, and `tBodyGyro-mean()-Z` are relatively small, indicating that the body gyroscope is not too high in any direction, which is consistent with walking.
* The values of `fBodyAcc-bandsEnergy()-1,8`, `fBodyAcc-bandsEnergy()-9,16`, and `fBodyAcc-bandsEnergy()-17,24` are relatively high, indicating that the body acceleration has a significant energy component in the frequency range of 1-24 Hz, wh

In [105]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."
# System Prompts 
query = f"""
* You are a Human Activity Recognition model.
* Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
* The activities are:
1 WALKING
2 WALKING_UPSTAIRS
3 WALKING_DOWNSTAIRS
4 SITTING
5 STANDING
6 LAYING
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
Feature Names: {features}
Here are a few examples for your easy understanding:
1. Feature Data: {example(234)}
   Predicted Label: {label(234)}
2. Feature Data: {example(134)}
   Predicted Label: {label(134)}

Now, predict the activity for this:
Feature Data: {example(135)}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Based on the provided feature data, I predict the activity label as: WALKING_UPSTAIRS


In [16]:
import os
from groq import Groq

In [47]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Now, predict this:
                        Feature Data: {" ".join(map(str, example(706)))}
            """
        }
    ],
    model=groq_models["llama3-70b"],
    temperature=0
)

print(chat_completion.choices[0].message.content)

Based on the provided feature data, I predict that the activity being performed is **WALKING**.

My reasoning is based on the following observations:

1. The values of `tBodyAcc-mean()-X`, `tBodyAcc-mean()-Y`, and `tBodyAcc-mean()-Z` are relatively small, indicating that the body acceleration is not extreme in any direction. This is consistent with walking, which typically involves gentle movements.
2. The values of `tBodyAcc-std()-X`, `tBodyAcc-std()-Y`, and `tBodyAcc-std()-Z` are moderate, suggesting that there is some variation in the body acceleration, but not excessively so. This is also consistent with walking, which involves some degree of movement variability.
3. The values of `tBodyGyro-mean()-X`, `tBodyGyro-mean()-Y`, and `tBodyGyro-mean()-Z` are relatively small, indicating that the body angular velocity is not extreme in any direction. This is consistent with walking, which typically involves gentle rotations.
4. The values of `fBodyAcc-bandsEnergy()-1,8`, `fBodyAcc-bandsEn

In [45]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Look at these examples for reference:
                        Example 1: 
                        Feature Data: {" ".join(map(str, example(489)))}
                        Activity Label: {label(489)}

                        Example 2:
                        Feature Data: {" ".join(map(str, example(100)))}
                        Activity Label: {label(100)}

                        Example 3:
                        Feature Data: {" ".join(map(str, example(345)))}
                        Activity Label: {label(345)}
                        
                        Now, predict this:
                        Feature Data: {" ".join(map(str, example(706)))}
            """
        }
    ],
    model=groq_models["llama3-70b"],
    temperature=0
)

print(chat_completion.choices[0].message.content)

BadRequestError: Error code: 400 - {'error': {'message': 'Bad Request', 'type': 'invalid_request_error'}}

In [44]:
label(100)

'WALKING'

In [38]:
f"The features are: {" ".join(map(str, features))}"

'The features are: tBodyAcc-mean()-X tBodyAcc-mean()-Y tBodyAcc-mean()-Z tBodyAcc-std()-X tBodyAcc-std()-Z tBodyAcc-max()-Y tBodyAcc-energy()-Y tBodyAcc-entropy()-X tBodyAcc-entropy()-Y tBodyAcc-entropy()-Z tBodyAcc-arCoeff()-X,1 tBodyAcc-arCoeff()-X,2 tBodyAcc-arCoeff()-X,3 tBodyAcc-arCoeff()-X,4 tBodyAcc-arCoeff()-Y,1 tBodyAcc-arCoeff()-Y,2 tBodyAcc-arCoeff()-Y,3 tBodyAcc-arCoeff()-Y,4 tBodyAcc-arCoeff()-Z,1 tBodyAcc-arCoeff()-Z,2 tBodyAcc-arCoeff()-Z,3 tBodyAcc-arCoeff()-Z,4 tBodyAcc-correlation()-X,Y tBodyAcc-correlation()-X,Z tBodyAcc-correlation()-Y,Z tGravityAcc-mean()-X tGravityAcc-mean()-Y tGravityAcc-mean()-Z tGravityAcc-std()-X tGravityAcc-std()-Y tGravityAcc-std()-Z tGravityAcc-sma() tGravityAcc-energy()-Y tGravityAcc-energy()-Z tGravityAcc-entropy()-X tGravityAcc-entropy()-Y tGravityAcc-entropy()-Z tGravityAcc-arCoeff()-X,1 tGravityAcc-arCoeff()-Y,1 tGravityAcc-arCoeff()-Z,1 tGravityAcc-correlation()-X,Y tGravityAcc-correlation()-X,Z tGravityAcc-correlation()-Y,Z tBodyAccJ

In [32]:
" ".join(map(str, example(134)))

'0.28521305 -0.020202459 -0.011024206 0.12298877 0.049997153 -0.13146399 -0.77387525 0.17313063 0.41256235 0.4877274 -0.097506372 0.061074712 -0.10510428 0.25557876 -0.043594722 0.12376661 -0.043619384 0.33774432 0.28254422 -0.11437041 0.30265977 -0.33179919 -0.60947432 -0.2593937 0.27734239 0.92083549 -0.26701193 -0.051933496 -0.95911473 -0.90075178 -0.80465984 -0.2541099 -0.87977617 -0.99175308 -0.21686554 -1.0 -1.0 -0.56643409 -0.25239814 -0.25379589 -0.036773651 0.33624441 0.90691777 0.68795324 -0.44195853 -0.58961367 -0.32076725 -0.10694661 0.061288855 0.054695983 -0.091688698 0.29732275 -0.27131136 0.7110072 0.024794932 0.34132929 0.1557608 -0.37536597 -0.66512163 0.29066963 -0.4128569 0.18260599 0.15337268 -0.62878244 -0.13080261 -0.025364134 -0.18506944 -0.60874073 -0.83871619 0.18495313 0.53451538 -0.57162283 0.053450506 -0.30787046 -0.010199517 0.13563352 -0.059698596 0.12699625 -0.31462838 -0.20415478 0.24678686 0.21497499 0.090683463 -0.51010448 0.19139611 0.4794505 -0.1428

In [25]:
label(451)

'WALKING'

In [23]:
while True:
    n = int(input())
    if n==-1:
        break
    
    print(label(n))

STANDING
STANDING
WALKING
WALKING
SITTING
LAYING
