# Notebook to demonstrate Zero shot and Few shot Learning

In [1]:
import pandas as pd 
#%pip install langchain_groq
from langchain_groq.chat_models import ChatGroq
import pandas as pd
import numpy as np
from itertools import product
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, recall_score

In [2]:
# Groq API and Models 
Groq_Token = open('..\\groqapi.txt', 'r').read()  # Do not share this key with anyone
groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 0.5 marks deduction.

# Zero Shot 

In [3]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment label: Neutral

Explanation: The sentence expresses mixed sentiments. The words "amazing" and "happy" convey a positive sentiment, indicating satisfaction with the product quality and customer service, respectively. However, the phrase "delivery was delayed" expresses a negative sentiment, indicating dissatisfaction with the delivery experience. Since both positive and negative sentiments are present, the overall sentiment is neutral.


# Few Shot

In [4]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Here are few examples:
1. Sentence: 'The customer service was excellent, and I received my order quickly.'
Sentiment: Positive

2. Sentence: 'The food was bland and the service was slow.'
Sentiment: Negative

3. Sentence: 'The product is okay, but it's not worth the price.'
Sentiment: Neutral

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment: Positive

Explanation: Although the sentence mentions a negative aspect ("the delivery was delayed"), the positive sentiments expressed in the sentence ("The product quality is amazing" and "I am happy with the customer service") outweigh the negative one, resulting in an overall positive sentiment.


In [5]:
f = open("C:\\Users\\Rajeev Wankar\\Desktop\\Aarsh IITGN\\ES335 Machine Learning\\main\\ES335-MLAssignment1\\HAR\\UCI HAR Dataset\\features.txt", 'r').read().split('\n')

In [6]:
feature_labels = pd.read_csv(r'../HAR/UCI HAR Dataset/features.txt',sep = '\\s+',header=None)
dataframeX = pd.read_csv(r'../HAR/UCI HAR Dataset/train/X_train.txt',sep = '\\s+',header=None)
dataframeY = pd.read_csv(r'../HAR/UCI HAR Dataset/train/y_train.txt',sep = '\\s+',header=None)
correlation_matrix = dataframeX.corr()

In [7]:
def reduce_features(threshold = None):

    removals = set()
    pairs = product(range(len(correlation_matrix.columns)), repeat=2)

    for i, j in pairs:
        if i > j:
            correlation_value = correlation_matrix.iloc[i, j]
            if abs(correlation_value) > threshold and correlation_value != 1:
                if i not in removals and j not in removals:
                    removals.add(i)

    selected_features = [col for col in dataframeX.columns if col not in removals]
    return selected_features
    


    

In [8]:
frs = reduce_features(threshold=0.9)

In [9]:
features = np.array(feature_labels[1].iloc[frs])

def example(n):
    return dataframeX[frs].iloc[n, :].to_numpy()

activity_dic = {1: "WALKING", 2: "WALKING_UPSTAIRS", 3: "WALKING_DOWNSTAIRS", 4: "SITTING", 5: "STANDING", 6: "LAYING"}
def label(n):
    #print(dataframeY.iloc[n, 0])
    return (activity_dic[dataframeY.iloc[n, 0]])
    
label(10)
print(label(100))
print(label(234))
print(label(134))
print(label(135))

WALKING
SITTING
WALKING_DOWNSTAIRS
WALKING_DOWNSTAIRS


In [10]:
print(len(features))
print(features)

233
['tBodyAcc-mean()-X' 'tBodyAcc-mean()-Y' 'tBodyAcc-mean()-Z'
 'tBodyAcc-std()-X' 'tBodyAcc-std()-Z' 'tBodyAcc-max()-Y'
 'tBodyAcc-energy()-Y' 'tBodyAcc-entropy()-X' 'tBodyAcc-entropy()-Y'
 'tBodyAcc-entropy()-Z' 'tBodyAcc-arCoeff()-X,1' 'tBodyAcc-arCoeff()-X,2'
 'tBodyAcc-arCoeff()-X,3' 'tBodyAcc-arCoeff()-X,4'
 'tBodyAcc-arCoeff()-Y,1' 'tBodyAcc-arCoeff()-Y,2'
 'tBodyAcc-arCoeff()-Y,3' 'tBodyAcc-arCoeff()-Y,4'
 'tBodyAcc-arCoeff()-Z,1' 'tBodyAcc-arCoeff()-Z,2'
 'tBodyAcc-arCoeff()-Z,3' 'tBodyAcc-arCoeff()-Z,4'
 'tBodyAcc-correlation()-X,Y' 'tBodyAcc-correlation()-X,Z'
 'tBodyAcc-correlation()-Y,Z' 'tGravityAcc-mean()-X'
 'tGravityAcc-mean()-Y' 'tGravityAcc-mean()-Z' 'tGravityAcc-std()-X'
 'tGravityAcc-std()-Y' 'tGravityAcc-std()-Z' 'tGravityAcc-sma()'
 'tGravityAcc-energy()-Y' 'tGravityAcc-energy()-Z'
 'tGravityAcc-entropy()-X' 'tGravityAcc-entropy()-Y'
 'tGravityAcc-entropy()-Z' 'tGravityAcc-arCoeff()-X,1'
 'tGravityAcc-arCoeff()-Y,1' 'tGravityAcc-arCoeff()-Z,1'
 'tGravityAcc-cor

In [11]:
dictionary = {}
for i in range(len(frs)):
    dictionary[frs[i]] = features[i]

In [12]:
dictionary

{0: 'tBodyAcc-mean()-X',
 1: 'tBodyAcc-mean()-Y',
 2: 'tBodyAcc-mean()-Z',
 3: 'tBodyAcc-std()-X',
 5: 'tBodyAcc-std()-Z',
 10: 'tBodyAcc-max()-Y',
 17: 'tBodyAcc-energy()-Y',
 22: 'tBodyAcc-entropy()-X',
 23: 'tBodyAcc-entropy()-Y',
 24: 'tBodyAcc-entropy()-Z',
 25: 'tBodyAcc-arCoeff()-X,1',
 26: 'tBodyAcc-arCoeff()-X,2',
 27: 'tBodyAcc-arCoeff()-X,3',
 28: 'tBodyAcc-arCoeff()-X,4',
 29: 'tBodyAcc-arCoeff()-Y,1',
 30: 'tBodyAcc-arCoeff()-Y,2',
 31: 'tBodyAcc-arCoeff()-Y,3',
 32: 'tBodyAcc-arCoeff()-Y,4',
 33: 'tBodyAcc-arCoeff()-Z,1',
 34: 'tBodyAcc-arCoeff()-Z,2',
 35: 'tBodyAcc-arCoeff()-Z,3',
 36: 'tBodyAcc-arCoeff()-Z,4',
 37: 'tBodyAcc-correlation()-X,Y',
 38: 'tBodyAcc-correlation()-X,Z',
 39: 'tBodyAcc-correlation()-Y,Z',
 40: 'tGravityAcc-mean()-X',
 41: 'tGravityAcc-mean()-Y',
 42: 'tGravityAcc-mean()-Z',
 43: 'tGravityAcc-std()-X',
 44: 'tGravityAcc-std()-Y',
 45: 'tGravityAcc-std()-Z',
 55: 'tGravityAcc-sma()',
 57: 'tGravityAcc-energy()-Y',
 58: 'tGravityAcc-energy()-Z',
 

In [13]:
act_labels = [label(i) for i in range(7352)]

In [14]:
reduced_dataset = dataframeX[frs]
reduced_dataset=reduced_dataset.rename(columns=dictionary)

#display(reduced_dataset)
reduced_dataset.insert(0, "Activity_Name", act_labels)
#reduced_dataset["Activity_Name"] = act_labels
display(reduced_dataset)

Unnamed: 0,Activity_Name,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Z,tBodyAcc-max()-Y,tBodyAcc-energy()-Y,tBodyAcc-entropy()-X,tBodyAcc-entropy()-Y,...,fBodyBodyGyroMag-meanFreq(),fBodyBodyGyroMag-skewness(),fBodyBodyGyroJerkMag-min(),fBodyBodyGyroJerkMag-maxInds,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)"
0,STANDING,0.288585,-0.020294,-0.132905,-0.995279,-0.913526,-0.567378,-0.999863,-0.407747,-0.679338,...,-0.128989,0.586156,-0.991048,-1.000000,-0.074323,-0.298676,-0.112754,0.030400,-0.464761,-0.018446
1,STANDING,0.278419,-0.016411,-0.123520,-0.998245,-0.960322,-0.557851,-0.999788,-0.714892,-0.500930,...,-0.271958,-0.336310,-0.994440,-1.000000,0.158075,-0.595051,0.053477,-0.007435,-0.732626,0.703511
2,STANDING,0.279653,-0.019467,-0.113462,-0.995380,-0.978944,-0.557851,-0.999660,-0.592235,-0.485821,...,-0.212728,-0.535352,-0.995866,-0.555556,0.414503,-0.390748,-0.118559,0.177899,0.100699,0.808529
3,STANDING,0.279174,-0.026201,-0.123283,-0.996091,-0.990675,-0.576159,-0.999736,-0.627446,-0.850930,...,-0.035684,-0.230091,-0.995732,-0.936508,0.404573,-0.117290,-0.036788,-0.012892,0.640011,-0.485366
4,STANDING,0.276629,-0.016570,-0.115362,-0.998139,-0.990482,-0.569174,-0.999856,-0.786553,-0.559477,...,-0.273582,-0.510282,-0.997418,-0.936508,0.087753,-0.351471,0.123320,0.122542,0.693578,-0.615971
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7347,WALKING_UPSTAIRS,0.299665,-0.057193,-0.181233,-0.195387,0.077078,0.029369,-0.788077,0.195643,0.140158,...,-0.376234,-0.170845,-0.925241,-0.904762,-0.070157,-0.588433,-0.190437,0.829718,0.206972,-0.425619
7348,WALKING_UPSTAIRS,0.273853,-0.007749,-0.147468,-0.235309,0.059280,0.029369,-0.804546,0.178138,0.293566,...,-0.296176,0.019626,-0.761880,-0.904762,0.165259,-0.390738,0.064907,0.875679,-0.879033,0.400219
7349,WALKING_UPSTAIRS,0.273387,-0.017011,-0.045022,-0.218218,0.274533,-0.110405,-0.844279,0.173131,0.276040,...,-0.320249,0.490169,-0.692582,-0.904762,0.195034,0.025145,0.052806,-0.266724,0.864404,0.701169
7350,WALKING_UPSTAIRS,0.289654,-0.018843,-0.158281,-0.219139,0.268893,-0.149495,-0.846871,0.160333,0.246561,...,-0.412332,0.308105,-0.886154,-0.904762,0.013865,0.063907,-0.101360,0.700740,0.936674,-0.589479


In [15]:
relevant_features = [
    'tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z',
    'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z',
    'tBodyAccJerk-mean()-X', 'tBodyAccJerk-mean()-Y', 'tBodyAccJerk-mean()-Z',
    'tBodyGyro-mean()-X', 'tBodyGyro-mean()-Y', 'tBodyGyro-mean()-Z',
    'tBodyGyroJerk-mean()-X', 'tBodyGyroJerk-mean()-Y', 'tBodyGyroJerk-mean()-Z',
    'tBodyAccMag-mean()', 'tBodyAccMag-std()', 'tBodyGyroMag-mean()', 'tBodyGyroMag-std()',
    'tBodyAccJerkMag-mean()', 'tBodyAccJerkMag-std()', 'tBodyGyroJerkMag-mean()', 'tBodyGyroJerkMag-std()',
    'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
    'angle(tBodyGyroMean,gravityMean)', 'angle(tBodyGyroJerkMean,gravityMean)',
    'fBodyAcc-meanFreq()-X', 'fBodyAcc-meanFreq()-Y', 'fBodyAcc-meanFreq()-Z',
    'fBodyAccJerk-meanFreq()-X', 'fBodyAccJerk-meanFreq()-Y', 'fBodyAccJerk-meanFreq()-Z',
    'fBodyGyro-meanFreq()-X', 'fBodyGyro-meanFreq()-Y', 'fBodyGyro-meanFreq()-Z'
]

In [16]:
real_relevants = []
for i in relevant_features:
    if i in features:
        real_relevants.append(i)
    


In [135]:
neo_reduced_dataset=reduced_dataset[["Activity_Name"]+real_relevants[:20]]

In [136]:
np.random.seed(42)
total_arr = np.random.choice(7352, 50, replace=False)
train_arr = total_arr[:30]
test_arr = total_arr[30:]

In [137]:
train_dataset = neo_reduced_dataset.loc[train_arr].reset_index(drop=True)
test_dataset = neo_reduced_dataset.loc[test_arr].reset_index(drop=True).drop(columns="Activity_Name")
display(train_dataset)

Unnamed: 0,Activity_Name,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Z,tBodyAccJerk-mean()-X,tBodyAccJerk-mean()-Y,tBodyAccJerk-mean()-Z,tBodyGyro-mean()-X,...,tBodyGyro-mean()-Z,tBodyGyroJerk-mean()-X,tBodyGyroJerk-mean()-Y,tBodyGyroJerk-mean()-Z,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)",fBodyAcc-meanFreq()-X,fBodyAcc-meanFreq()-Y
0,WALKING_DOWNSTAIRS,0.283203,-0.047024,-0.168986,0.384949,-0.310332,-0.012961,0.101683,-0.161204,-0.033257,...,0.088383,0.248306,-0.110213,-0.123357,-0.034924,0.558036,0.258975,-0.854858,-0.415681,-0.132703
1,WALKING_DOWNSTAIRS,0.256904,-0.036623,-0.133856,0.201409,0.344183,0.346611,0.239319,0.113382,-0.273456,...,0.216392,-0.002706,-0.109614,-0.279235,0.017986,-0.475545,0.942947,-0.567147,-0.253835,-0.00213
2,WALKING,0.291316,-0.001065,-0.072461,-0.336609,-0.303323,0.294047,-0.220531,-0.297011,0.041363,...,0.013553,-0.124192,-0.03795,-0.192853,-0.044663,-0.551806,-0.680774,0.161405,-0.268996,0.119432
3,SITTING,0.276116,-0.010909,-0.102886,-0.992196,-0.981127,0.079528,0.021849,-0.008992,-0.041065,...,0.173899,-0.092831,-0.023087,-0.085052,-0.177275,-0.309304,0.176515,-0.143818,0.16103,0.398283
4,WALKING_DOWNSTAIRS,0.256382,0.000428,-0.113664,0.075014,-0.369482,-0.054941,-0.020061,-0.207329,-0.198899,...,-0.072958,-0.143041,-0.082521,-0.093903,0.347143,0.590475,0.831084,0.465453,-0.403025,-0.076212
5,WALKING,0.286792,-0.010171,-0.09051,-0.541894,-0.405104,0.12898,-0.11842,0.077925,-0.067952,...,0.060651,-0.105985,0.08738,-0.146405,-0.291898,-0.428998,0.957119,0.150076,-0.448739,-0.182166
6,WALKING_UPSTAIRS,0.307005,-0.011717,-0.199597,-0.193934,0.153858,-0.41495,0.012918,-0.020134,0.1198,...,0.124842,-0.00856,0.382601,0.31519,-0.3415,0.70313,-0.649245,0.145511,-0.253521,-0.242334
7,WALKING,0.261081,-0.015636,-0.102588,-0.171239,-0.261589,-0.190438,-0.127373,-0.459133,-0.032998,...,0.176337,0.05508,-0.220133,0.013602,0.717645,0.401581,-0.038345,-0.593632,-0.075309,0.223796
8,WALKING_DOWNSTAIRS,0.410628,-0.079242,-0.13313,-0.05761,-0.476595,0.203547,0.095055,0.168325,-0.101665,...,0.130632,0.007449,-0.095592,-0.01102,-0.434719,-0.583762,0.765416,-0.72425,-0.584146,-0.231607
9,STANDING,0.278973,-0.014899,-0.108145,-0.996035,-0.995094,0.077929,0.0089,-0.009874,-0.031413,...,0.079021,-0.100242,-0.03921,-0.052447,-0.055863,-0.172725,0.641048,0.527575,0.140468,0.339156


In [138]:
display(test_dataset)

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Z,tBodyAccJerk-mean()-X,tBodyAccJerk-mean()-Y,tBodyAccJerk-mean()-Z,tBodyGyro-mean()-X,tBodyGyro-mean()-Y,tBodyGyro-mean()-Z,tBodyGyroJerk-mean()-X,tBodyGyroJerk-mean()-Y,tBodyGyroJerk-mean()-Z,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)",fBodyAcc-meanFreq()-X,fBodyAcc-meanFreq()-Y
0,0.354038,-0.032713,-0.130041,-0.262424,0.103241,-0.164872,-0.191488,-0.141229,-0.028889,-0.016386,0.012304,-0.103441,0.171971,-0.036551,-0.69253,0.539061,0.083129,0.122397,-0.310385,0.03074
1,0.383065,-0.002673,-0.161956,-0.886149,-0.850207,0.05297,0.010709,0.035588,0.028858,-0.087974,0.484237,-0.105495,-0.036806,-0.149532,-0.041743,-0.116764,-0.266487,0.242272,-0.674151,0.104224
2,0.270334,-0.006469,-0.075698,-0.367132,-0.340368,0.624318,0.054682,0.163803,-0.05436,-0.120071,0.130143,-0.041155,0.028957,-0.168439,0.266461,-0.704441,0.468177,-0.332219,-0.010932,0.067182
3,0.332032,-0.034668,-0.124365,-0.24349,-0.217886,-0.407685,0.205958,-0.122231,-0.039612,-0.033779,-0.009016,-0.215738,-0.278663,-0.211934,-0.786405,0.722978,0.054462,0.003516,-0.071939,-0.092365
4,0.250267,-0.024771,-0.115184,-0.082664,-0.27069,0.050263,-0.116439,-0.03715,-0.117859,-0.031691,-0.031003,-0.293376,0.138514,-0.079262,0.482225,0.073668,0.750983,0.660631,-0.218744,0.443713
5,0.28306,-0.020522,-0.110535,-0.992474,-0.982363,0.077779,0.008948,-0.003621,-0.028567,-0.068491,0.113573,-0.094534,-0.038579,-0.070636,0.084189,-0.0488,-0.27473,-0.006193,-0.239275,-0.149203
6,0.264659,-0.019388,-0.095513,-0.986949,-0.977386,0.06663,0.013104,0.008348,-0.034554,-0.067426,0.081855,-0.092012,-0.04298,-0.051888,0.082861,-0.123162,-0.230813,0.205864,-0.110668,0.420278
7,0.421682,0.011998,0.093855,-0.914073,-0.67842,0.051535,-0.05929,-0.133083,-0.009035,0.02459,0.07231,-0.086235,-0.052726,-0.032057,-0.067263,-0.007053,-0.228974,-0.252745,-0.570003,-0.562642
8,0.28919,-0.016912,-0.109204,-0.992669,-0.990263,0.079835,0.005397,0.00663,-0.019149,-0.067025,0.059356,-0.101729,-0.042814,-0.062813,-0.036688,-0.01638,0.164991,0.595062,-0.03692,0.181445
9,0.274885,-0.018995,-0.105931,-0.996744,-0.985398,0.076368,0.007603,-0.006116,-0.029679,-0.074768,0.08692,-0.097895,-0.039857,-0.054515,0.170395,-0.102562,0.804323,-0.239214,0.127465,0.275704


In [139]:
display(train_dataset)

Unnamed: 0,Activity_Name,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Z,tBodyAccJerk-mean()-X,tBodyAccJerk-mean()-Y,tBodyAccJerk-mean()-Z,tBodyGyro-mean()-X,...,tBodyGyro-mean()-Z,tBodyGyroJerk-mean()-X,tBodyGyroJerk-mean()-Y,tBodyGyroJerk-mean()-Z,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)",fBodyAcc-meanFreq()-X,fBodyAcc-meanFreq()-Y
0,WALKING_DOWNSTAIRS,0.283203,-0.047024,-0.168986,0.384949,-0.310332,-0.012961,0.101683,-0.161204,-0.033257,...,0.088383,0.248306,-0.110213,-0.123357,-0.034924,0.558036,0.258975,-0.854858,-0.415681,-0.132703
1,WALKING_DOWNSTAIRS,0.256904,-0.036623,-0.133856,0.201409,0.344183,0.346611,0.239319,0.113382,-0.273456,...,0.216392,-0.002706,-0.109614,-0.279235,0.017986,-0.475545,0.942947,-0.567147,-0.253835,-0.00213
2,WALKING,0.291316,-0.001065,-0.072461,-0.336609,-0.303323,0.294047,-0.220531,-0.297011,0.041363,...,0.013553,-0.124192,-0.03795,-0.192853,-0.044663,-0.551806,-0.680774,0.161405,-0.268996,0.119432
3,SITTING,0.276116,-0.010909,-0.102886,-0.992196,-0.981127,0.079528,0.021849,-0.008992,-0.041065,...,0.173899,-0.092831,-0.023087,-0.085052,-0.177275,-0.309304,0.176515,-0.143818,0.16103,0.398283
4,WALKING_DOWNSTAIRS,0.256382,0.000428,-0.113664,0.075014,-0.369482,-0.054941,-0.020061,-0.207329,-0.198899,...,-0.072958,-0.143041,-0.082521,-0.093903,0.347143,0.590475,0.831084,0.465453,-0.403025,-0.076212
5,WALKING,0.286792,-0.010171,-0.09051,-0.541894,-0.405104,0.12898,-0.11842,0.077925,-0.067952,...,0.060651,-0.105985,0.08738,-0.146405,-0.291898,-0.428998,0.957119,0.150076,-0.448739,-0.182166
6,WALKING_UPSTAIRS,0.307005,-0.011717,-0.199597,-0.193934,0.153858,-0.41495,0.012918,-0.020134,0.1198,...,0.124842,-0.00856,0.382601,0.31519,-0.3415,0.70313,-0.649245,0.145511,-0.253521,-0.242334
7,WALKING,0.261081,-0.015636,-0.102588,-0.171239,-0.261589,-0.190438,-0.127373,-0.459133,-0.032998,...,0.176337,0.05508,-0.220133,0.013602,0.717645,0.401581,-0.038345,-0.593632,-0.075309,0.223796
8,WALKING_DOWNSTAIRS,0.410628,-0.079242,-0.13313,-0.05761,-0.476595,0.203547,0.095055,0.168325,-0.101665,...,0.130632,0.007449,-0.095592,-0.01102,-0.434719,-0.583762,0.765416,-0.72425,-0.584146,-0.231607
9,STANDING,0.278973,-0.014899,-0.108145,-0.996035,-0.995094,0.077929,0.0089,-0.009874,-0.031413,...,0.079021,-0.100242,-0.03921,-0.052447,-0.055863,-0.172725,0.641048,0.527575,0.140468,0.339156


In [96]:
print(f"{train_dataset.to_csv()}")

,Activity_Name,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Z,tBodyAccJerk-mean()-X,tBodyAccJerk-mean()-Y,tBodyAccJerk-mean()-Z,tBodyGyro-mean()-X,tBodyGyro-mean()-Y,tBodyGyro-mean()-Z,tBodyGyroJerk-mean()-X,tBodyGyroJerk-mean()-Y,tBodyGyroJerk-mean()-Z,"angle(tBodyAccMean,gravity)"
0,WALKING_DOWNSTAIRS,0.28320271,-0.047023857,-0.16898586,0.38494871,-0.31033164,-0.012960718,0.10168341,-0.16120374,-0.033256744,-0.092865376,0.088382514,0.24830562,-0.11021282,-0.12335672,-0.034924227
1,WALKING_DOWNSTAIRS,0.2569044,-0.036623068,-0.13385588,0.20140924,0.34418337,0.34661123,0.23931852,0.11338153,-0.27345649,0.014295738,0.21639151,-0.0027058156,-0.10961439,-0.27923492,0.017985874
2,WALKING,0.29131635,-0.0010652766,-0.072461402,-0.33660882,-0.30332258,0.2940469,-0.22053079,-0.29701051,0.041363427,-0.16577855,0.013553112,-0.1241918,-0.03794984,-0.19285335,-0.044663481
3,SITTING,0.27611576,-0.010909002,-0.10288577,-0.99219581,-0.98112725,0.079528326,0.021

In [56]:
pd.options.display.max_rows = 9999

In [140]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."
# System Prompts 
query = f"""
* You are a Human Activity Recognition model.
* Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
* The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
* The data features are: {list(train_dataset.columns)[1:]}
* Provide a Python list consisting of the activity label for every test sample.

Train data: (remember, there are {len(train_dataset)} training samples.)

{train_dataset.to_csv()}

Now, predict the activity label of the test data: (remember, there are {len(test_dataset)} test samples.)

Test data:

{test_dataset.to_csv()}
""" 

#To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)
ans2 = llm.invoke(f"Extract the python list from this text and output it. Do not say anything else: {answer}")
print(answer.content)
print(ans2.content)

Based on the provided data, I will predict the activity labels for the test data. Here is the list of predicted activity labels:

1. WALKING
2. STANDING
3. WALKING
4. WALKING_DOWNSTAIRS
5. STANDING
6. STANDING
7. WALKING_UPSTAIRS
8. STANDING
9. STANDING
10. LAYING
11. WALKING_UPSTAIRS
12. WALKING_DOWNSTAIRS
13. STANDING
14. STANDING
15. WALKING
16. STANDING
17. STANDING
18. LAYING
19. WALKING_UPSTAIRS
20. WALKING_DOWNSTAIRS
['WALKING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', 'STANDING', 'STANDING', 'WALKING_UPSTAIRS', 'STANDING', 'STANDING', 'LAYING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'STANDING', 'STANDING', 'WALKING', 'STANDING', 'STANDING', 'LAYING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']


In [128]:
print(f"{list(train_dataset.columns)[1:]}")

['tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z', 'tBodyAcc-std()-X', 'tBodyAcc-std()-Z', 'tBodyAccJerk-mean()-X', 'tBodyAccJerk-mean()-Y', 'tBodyAccJerk-mean()-Z', 'tBodyGyro-mean()-X', 'tBodyGyro-mean()-Y', 'tBodyGyro-mean()-Z', 'tBodyGyroJerk-mean()-X', 'tBodyGyroJerk-mean()-Y', 'tBodyGyroJerk-mean()-Z', 'angle(tBodyAccMean,gravity)']


In [50]:
print(train_dataset.to_csv(sep=" "))

 Activity_Name tBodyAcc-mean()-X tBodyAcc-mean()-Y tBodyAcc-mean()-Z tBodyAcc-std()-X tBodyAcc-std()-Z tBodyAccJerk-mean()-X tBodyAccJerk-mean()-Y tBodyAccJerk-mean()-Z tBodyGyro-mean()-X tBodyGyro-mean()-Y tBodyGyro-mean()-Z tBodyGyroJerk-mean()-X tBodyGyroJerk-mean()-Y tBodyGyroJerk-mean()-Z angle(tBodyAccMean,gravity) angle(tBodyAccJerkMean),gravityMean) angle(tBodyGyroMean,gravityMean) angle(tBodyGyroJerkMean,gravityMean) fBodyAcc-meanFreq()-X fBodyAcc-meanFreq()-Y fBodyAcc-meanFreq()-Z fBodyAccJerk-meanFreq()-Y fBodyGyro-meanFreq()-X fBodyGyro-meanFreq()-Y fBodyGyro-meanFreq()-Z
0 WALKING_DOWNSTAIRS 0.28320271 -0.047023857 -0.16898586 0.38494871 -0.31033164 -0.012960718 0.10168341 -0.16120374 -0.033256744 -0.092865376 0.088382514 0.24830562 -0.11021282 -0.12335672 -0.034924227 0.55803642 0.25897517 -0.85485787 -0.41568147 -0.13270251 0.077106191 -0.46360011 -0.17365034 -0.11299746 -0.1672175
1 WALKING_DOWNSTAIRS 0.2569044 -0.036623068 -0.13385588 0.20140924 0.34418337 0.34661123 0

In [141]:
correct_arr = []
for i in range(len(test_arr)):
    correct_arr.append(label(test_arr[i]))
    #print(i, label(test_arr[i]))

In [142]:
predicted_labels= ['WALKING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', 'STANDING', 'STANDING', 'WALKING_UPSTAIRS', 'STANDING', 'STANDING', 'LAYING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'STANDING', 'STANDING', 'WALKING', 'STANDING', 'STANDING', 'LAYING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']

In [143]:
print(len(predicted_labels))

20


In [144]:
np.mean(np.array(predicted_labels)==np.array(correct_arr))

0.2

In [112]:
label(135)

'WALKING_DOWNSTAIRS'

In [40]:
import os
from groq import Groq

In [72]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Which features do you think are relevant for differentiating between activities?
            """
        }
    ],
    model=groq_models["llama3.1-70b"],
    temperature=0
)

print(chat_completion.choices[0].message.content)

As a Human Activity Recognition model, I've analyzed the provided features and identified some that are likely to be relevant for differentiating between activities. Here are my top picks:

**1. Accelerometer features:**
	* `tBodyAcc-mean()-X`, `tBodyAcc-mean()-Y`, `tBodyAcc-mean()-Z`: These features capture the mean acceleration of the body in three dimensions, which can help distinguish between activities like walking, running, and standing.
	* `tBodyAcc-std()-X`, `tBodyAcc-std()-Y`, `tBodyAcc-std()-Z`: Standard deviation of acceleration can indicate the intensity or variability of movements, which can help separate activities like walking and running.
	* `tBodyAccJerk-mean()-X`, `tBodyAccJerk-mean()-Y`, `tBodyAccJerk-mean()-Z`: Jerk signals (rate of change of acceleration) can help identify sudden movements, like those involved in walking upstairs or downstairs.

**2. Gyroscope features:**
	* `tBodyGyro-mean()-X`, `tBodyGyro-mean()-Y`, `tBodyGyro-mean()-Z`: Mean gyroscope values can

In [74]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Which features do you think are relevant for differentiating between activities?
            """
        }
    ],
    model=groq_models["llama3.1-70b"],
    temperature=0
)

print(chat_completion.choices[0].message.content)

Based on the provided features, I'll identify some relevant ones for differentiating between activities. Keep in mind that the importance of features might vary depending on the specific activities and the dataset.

**Relevant features for differentiating between activities:**

1. **Acceleration features:**
	* `tBodyAcc-mean()-X`, `tBodyAcc-mean()-Y`, `tBodyAcc-mean()-Z`: These features capture the average acceleration in each axis, which can help differentiate between activities like walking, standing, and sitting.
	* `tBodyAcc-std()-X`, `tBodyAcc-std()-Y`, `tBodyAcc-std()-Z`: These features capture the standard deviation of acceleration in each axis, which can help identify activities with varying levels of movement, such as walking and running.
2. **Jerk features:**
	* `tBodyAccJerk-mean()-X`, `tBodyAccJerk-mean()-Y`, `tBodyAccJerk-mean()-Z`: These features capture the average jerk (rate of change of acceleration) in each axis, which can help differentiate between activities like wa

In [None]:
arr = ["tBodyAcc-mean()-X", "tBodyAcc-mean()-Y", `tBodyAcc-mean()-Z`]

In [78]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Which features do you think are relevant for differentiating between activities? Provide the output as a single copyable python array.
            """
        }
    ],
    model=groq_models["llama3.1-70b"],
    temperature=0
)

print(chat_completion.choices[0].message.content)

Based on the provided features, I've selected the ones that I think are most relevant for differentiating between activities. Here's the list of features in a copyable Python array:

```python
relevant_features = [
    'tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z',
    'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z',
    'tBodyAccJerk-mean()-X', 'tBodyAccJerk-mean()-Y', 'tBodyAccJerk-mean()-Z',
    'tBodyGyro-mean()-X', 'tBodyGyro-mean()-Y', 'tBodyGyro-mean()-Z',
    'tBodyGyroJerk-mean()-X', 'tBodyGyroJerk-mean()-Y', 'tBodyGyroJerk-mean()-Z',
    'tBodyAccMag-mean()', 'tBodyAccMag-std()', 'tBodyGyroMag-mean()', 'tBodyGyroMag-std()',
    'tBodyAccJerkMag-mean()', 'tBodyAccJerkMag-std()', 'tBodyGyroJerkMag-mean()', 'tBodyGyroJerkMag-std()',
    'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
    'angle(tBodyGyroMean,gravityMean)', 'angle(tBodyGyroJerkMean,gravityMean)',
    'fBodyAcc-meanFreq()-X', 'fBodyAcc-meanFreq()-Y', 'fBodyAcc-mean

In [None]:
client = Groq(
    api_key=Groq_Token,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The features are: {features}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """,
        },
        {
            "role": "user",
            "content": f"""
                        Look at these examples for reference:
                        Example 1: 
                        
                        Feature Data: {" ".join(map(str, example()))}
                        Activity Label: {label(489)}

                        Example 2:
                        Feature Data: {" ".join(map(str, example(100)))}
                        Activity Label: {label(100)}

                        Example 3:
                        Feature Data: {" ".join(map(str, example(345)))}
                        Activity Label: {label(345)}
                        
                        Now, predict this:
                        Feature Data: {" ".join(map(str, example(706)))}
            """
        }
    ],
    #model=groq_models["llama3-70b"],
    model = "llama-3.1-70b-versatile",
    temperature=0
)

print(chat_completion.choices[0].message.content)

In [None]:
act_dic = {"WALKING":[], "WALKING_UPSTAIRS": [], "WALKING_DOWNSTAIRS": [], "SITTING": [], "STANDING": [], "LAYING": []}

In [None]:
for i in range(len(dataframeY[0].to_numpy())):
    act_dic[label(i)].append(i)

In [None]:
act_dic.keys()

In [48]:
print(f"""
                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The training dataset is: {train_dataset}
                * Provide the activity label and, if necessary, a brief explanation of your reasoning.
            """)


                * You are a Human Activity Recognition model.
                * Your task is to analyze the data given in the features provided and guess the activity being performed by the person.
                * The activities are: WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING.
                * The training dataset is:            Activity_Name  tBodyAcc-mean()-X  tBodyAcc-mean()-Y  \
4525  WALKING_DOWNSTAIRS           0.283203          -0.047024   
1446  WALKING_DOWNSTAIRS           0.256904          -0.036623   
5995             WALKING           0.291316          -0.001065   
4222             SITTING           0.276116          -0.010909   
6754  WALKING_DOWNSTAIRS           0.256382           0.000428   
...                  ...                ...                ...   
1701             WALKING           0.197217           0.006483   
3383  WALKING_DOWNSTAIRS           0.215094          -0.011281   
208              SITTING           0.278327         