### Setup

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from langchain_groq.chat_models import ChatGroq
import tsfel

# Available Models
groq_models = {
    "llama3-70b": "llama3-70b-8192",
    "gemma-7b": "gemma-7b-it",
    "llama3.1-70b":"llama-3.1-70b-versatile",
    "llama3-8b":"llama3-8b-8192"
    }
model = groq_models["llama3-70b"]

# Constants
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
green = "\x1b[32;40m"
red =   "\x1b[31;40m"
reset = "\x1b[0m"       # color reset
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}
folders = list(classes.keys())
N = 30      # There are too many samples, so we will take 1 out of N

X_train = np.loadtxt("./HAR/UCI HAR Dataset/train/X_train.txt")
y_train = np.loadtxt("./HAR/UCI HAR Dataset/train/y_train.txt",dtype=np.int32)
X_test = np.loadtxt("./HAR/UCI HAR Dataset/test/X_test.txt")
y_test = np.loadtxt("./HAR/UCI HAR Dataset/test/y_test.txt",dtype=np.int32)
features = pd.read_csv("./HAR/UCI HAR Dataset/features.txt",sep="\s+",header=None)[1]

print(X_train.shape,y_train.shape,X_test.shape,y_test.shape,features.shape)

X = np.concatenate((X_train,X_test))
y = np.concatenate((y_train,y_test))

# split the data into training and testing sets. Change the seed value to obtain different random splits.
seed = 100
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=seed,stratify=y)

X_train = pd.DataFrame(X_train,columns=features)
X_test = pd.DataFrame(X_test,columns=features)

print(X_train.shape,y_train.shape,X_test.shape,y_test.shape,features.shape)

print("Training data shape: ",X_train.shape)
print("Testing data shape: ",X_test.shape)

X_examples = []
y_examples = []

for i in range(len(X_train)):
    if y_train[i] not in y_examples:
        X_examples.append(X_train.iloc[i,:])
        y_examples.append(y_train[i])

zero_shot_prompt = lambda data:f"""
* Your task is to classify the given featurised 3-axis accelerometer data into one of the following activity labels:
1) WALKING
2) SITTING
3) STANDING
4) WALKING_UPSTAIRS
5) WALKING_DOWNSTAIRS
6) LAYING
* Only output the identified label and nothing else.
* Do not provide any explanation or analysis.
Acceleration Data:
{data}
"""

examples = "\n".join([f"EXAMPLE {i} DATA :\n{X_examples[i]}\nEXAMPLE {i} LABEL : {folders[y_examples[i]-1]}" for i in range(len(X_examples))])

few_shot_prompt = lambda data:f"""
* You are HAR tool.
* Your task is to analyze the provided labeled featurised 3-axis accelerometer data and learn the patterns associated with the label in order to identify unlabeled data.
* Only give output in one word and do not provide any explanation.
{examples}

TEST DATA: {data}
LABEL for TEST DATA: ?
"""

(7352, 561) (7352,) (2947, 561) (2947,) (561,)
(7209, 561) (7209,) (3090, 561) (3090,) (561,)
Training data shape:  (7209, 561)
Testing data shape:  (3090, 561)


### Zero Shot

In [33]:
zero_shot_correct_count = 0

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)
zero_shot_ans = []

for i in range(len(X_test)//N):

    query = zero_shot_prompt(X_test.iloc[i,:])
    ans = llm.invoke(query).content
    zero_shot_ans.append(ans)
    if(ans==folders[y_test[i]-1]):
        zero_shot_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i+1:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_test[i]-1]:<20} Correct: {zero_shot_correct_count}",end="")

print()
print(f"""
Model:              {model}
Total Test Cases:   {len(X_test)//N}
Correct Predictions:{zero_shot_correct_count}
Accuracy:           {zero_shot_correct_count/(len(X_test)//N):.4f}
""")

llama3-70b-8192: Test case #103 Output: [31;40mSTANDING           [0mActual: WALKING              Correct: 24

Model:              llama3-70b-8192
Total Test Cases:   103
Correct Predictions:24
Accuracy:           0.2330



### Few Shot

In [34]:
few_shot_correct_count = 0

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)
few_shot_ans = []

for i in range(len(X_test)//N):

    query = few_shot_prompt(X_test.loc[i,:])
    ans = llm.invoke(query).content
    few_shot_ans.append(ans)
    
    if(ans==folders[y_test[i]-1]):
        few_shot_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_test[i]-1]:<20} Correct:{few_shot_correct_count}",end="")

print()
print(f"""
Model:               {model}
Total Test Cases:    {len(X_test)//N}
Correct Predictions: {few_shot_correct_count}
Accuracy:            {few_shot_correct_count/(len(X_test)//N):.2f}
""")

llama3-70b-8192: Test case #102 Output: [31;40mSTANDING           [0mActual: WALKING              Correct:28

Model:               llama3-70b-8192
Total Test Cases:    103
Correct Predictions: 28
Accuracy:            0.27



### Comparison between Few-shot and Zero-shot

Although the accuracy depends on the choices of examples and test cases provided, Few-shot generally gives more accuracy compared to Zero-shot, unless the examples provided are very biased.

The reason of this difference between the accuracies lie between the fact that we provide some examples of all the possible  classification allowing the LLM to refer to examples and compare the test data, whereas in Zero-shot this was not possible, as LLMs are not trained on large numerical data for HAR.

### Comparison of Few-shot and Decision Tree

In [None]:
clf = DecisionTreeClassifier(criterion="gini", random_state=46, min_samples_split=4, max_features='sqrt')

clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:<.3f}")

Decision Tree performs better than the Few-shot because few shots are not enough for the LLM to find the complex patterns in the data, and thus making it more erroreous.

In Decision Tree, the model is trained over the data iteratively in order to find intricate patterns

### Limitations of Zero-shot and Few-Shot in HAR

In Zero-shot learning as the LLM has no data to learn from, it is more prone to errors, but is comparatively faster than the Few-shot, as Few-shot has to first learn from the examples provided and then analyze the input.

In Few-shot learning, the LLM has to be provided correct examples which are sometimes unknown or are biased. This biasness in the examples can make the Few-shot learning more biased, giving less accuracy.

### Test with New Activity

We have taken "JOGGING" as new activity. We downloaded the data from https://www.cis.fordham.edu/wisdm/dataset.php as raw csv.
Then we took sample data from user id 33 and pre-processed the raw data to remove all the other activities and user ids.

The sample data file is saved as 'HAR/jogging.csv'

In [None]:
time = 10
offset = 100

df = pd.read_csv("./HAR/jogging.csv",sep=",",header=0,lineterminator=';')

cgf = tsfel.get_features_by_domain("statistical")  # All statistical domain features will be extracted
cgf = tsfel.get_features_by_domain("temporal")     # All temporal domain features will be extracted
cgf = tsfel.get_features_by_domain("spectral")     # All spectral domain features will be extracted

df = df.iloc[offset:offset+(time*50),1:]

X_new_test = tsfel.time_series_features_extractor(cgf,df,fs=50)

query = zero_shot_prompt(X_new_test)

ans = llm.invoke(query).content
print(ans)

We find that as the LLM was not provided examples associated with the JOGGING activity, it is unable to identify it.

### 4) Few Shot with Random Data

In [None]:
X_random = pd.DataFrame(np.random.random(X_test.shape),columns=features)
y_random = np.random.randint(1,len(folders),len(X_test))

rand_correct_count = 0
rand_ans = []

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)

for i in range(len(X_random)//N):

    query = zero_shot_prompt(X_random.iloc[i])
    ans = llm.invoke(query).content
    rand_ans.append(ans)

    if(ans==folders[y_random[i]-1]):
        rand_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_random[i]-1]:<20} Correct:{rand_correct_count}",end="")
    
print()
print(f"""
Model:               {model}
Total Test Cases:    {len(X_random)//N}
Correct Predictions: {rand_correct_count}
Accuracy:            {rand_correct_count/(len(X_random)//N):.2f}
""")