### Setup

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import os
from sklearn.model_selection import train_test_split
from langchain_groq.chat_models import ChatGroq

# Available Models
groq_models = {
    "llama3-70b": "llama3-70b-8192",
    "gemma-7b": "gemma-7b-it",
    "llama3.1-70b":"llama-3.1-70b-versatile",
    "llama3-8b":"llama3-8b-8192"
    }
model = groq_models["llama3-70b"]

# Constants
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
green = "\x1b[32;40m"
red =   "\x1b[31;40m"
reset = "\x1b[0m"       # color reset
time = 10
offset = 100
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}
folders = list(classes.keys())

combined_dir = os.path.join("./HAR/Combined")

X_train=[]
y_train=[]

dataset_dir = os.path.join(combined_dir,"Train")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))

    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_train.append(df.values)
        y_train.append(classes[folder])

X_test=[]
y_test=[]

dataset_dir = os.path.join(combined_dir,"Test")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))
    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_test.append(df.values)
        y_test.append(classes[folder])

# concatenate the training and testing data
X = np.concatenate((X_train,X_test))
y = np.concatenate((y_train,y_test))

# split the data into training and testing sets. Change the seed value to obtain different random splits.
seed = 100
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.7,random_state=seed,stratify=y)

print("Training data shape: ",X_train.shape)
print("Testing data shape: ",X_test.shape)

X_examples = []
y_examples = []

for i in range(len(X_train)):
    if y_train[i] not in y_examples:
        X_examples.append(X_train[i])
        y_examples.append(y_train[i])

zero_shot_prompt = lambda data:f"""
* Your task is to classify the given 3-axis accelerometer data into one of the following activity labels by analyzing the previously given training data.
1) WALKING
2) SITTING
3) STANDING
4) WALKING_UPSTAIRS
5) WALKING_DOWNSTAIRS
6) LAYING
* Accelerometer data represents movement in different directions.
- The x-axis represents forward and backward movement.
- The y-axis represents lateral (side-to-side) movement.
- The z-axis represents vertical movement.
* Only output the identified label and nothing else.
* Do not provide any explanation or analysis.
Acceleration Data:
{data}
"""

examples = "\n".join([f"EXAMPLE {i} DATA :\n{pd.DataFrame(X_examples[i],columns=['accX','accY','accZ'])}\nEXAMPLE {i} LABEL : {folders[y_examples[i]-1]}" for i in range(len(X_examples))])

few_shot_prompt = lambda data:f"""
* You are HAR tool.
* Your task is to analyze the provided labeled 3 axis accelerometer data and learn the patterns associated with the label in order to identify unlabeled data.
* Accelerometer data represents movement in different directions
- The x-axis represents forward and backward movement.
- The y-axis represents lateral (side-to-side) movement.
- The z-axis represents vertical movement.
* Only give output in one word and do not provide any explanation.
{examples}

TEST DATA: {data}
LABEL for TEST DATA: ?
"""

Training data shape:  (54, 500, 3)
Testing data shape:  (126, 500, 3)


### Zero Shot

In [4]:
zero_shot_correct_count = 0

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)

for i in range(len(X_test)):

    query = zero_shot_prompt(pd.DataFrame(X_test[i],columns=['accX','accY','accZ']))
    ans = llm.invoke(query).content

    if(ans==folders[y_test[i]-1]):
        zero_shot_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_test[i]-1]:<20} Correct: {zero_shot_correct_count}",end="")

print()
print(f"""
Model:              {model}
Total Test Cases:   {len(X_test)}
Correct Predictions:{zero_shot_correct_count}
Accuracy:           {zero_shot_correct_count/(len(X_test)):.2f}
""")

llama3-70b-8192: Test case #125 Output: [31;40mSTANDING           [0mActual: SITTING              Correct: 33
Model:              llama3-70b-8192
Total Test Cases:   126
Correct Prediction: 33
Accuracy:           0.26



### Few Shot

In [5]:
few_shot_correct_count = 0

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)

for i in range(len(X_test)):

    query = few_shot_prompt(pd.DataFrame(X_test[i],columns=['accX','accY','accZ']))
    ans = llm.invoke(query).content

    if(ans==folders[y_test[i]-1]):
        few_shot_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_test[i]-1]:<20} Correct:{few_shot_correct_count}",end="")

print()
print(f"""
Model:               {model}
Total Test Cases:    {len(X_test)}
Correct Predictions: {few_shot_correct_count}
Accuracy:            {few_shot_correct_count/len(X_test):.2f}
""")

llama3-70b-8192: Test case #125 Output: [32;40mSITTING            [0mActual: SITTING              Correct:53

Model:               llama3-70b-8192
Total Test Cases:    126
Correct Predictions: 53
Accuracy:            0.42



### Comparison between Few-shot and Zero-shot

Although the accuracy depends on the choices of examples and test cases provided, Few-shot always gives more accuracy compared to Zero-shot.

The reason of this difference between the accuracies lie between the fact that we provide some examples of all the possible  classification allowing the LLM to refer to examples and compare the test data, whereas in Zero-shot this was not possible, as LLMs are not trained on large numerical data for HAR.

### Limitations of Zero-shot and Few-Shot in HAR

In Zero-shot learning as the LLM has no data to learn from, it is more prone to errors, but is comparatively faster than the Few-shot, as Few-shot has to first learn from the examples provided and then analyze the input.

In Few-shot learning, the LLM has to be provided correct examples which are sometimes unknown or are biased. This biasness in the examples can make the Few-shot learning more biased, giving less accuracy.

### Test with New Activity

We have taken "JOGGING" as new activity. We downloaded the data from https://www.cis.fordham.edu/wisdm/dataset.php as raw text.
Then we took sample data from user id 33 and pre-processed the raw data to remove all the other activities and user ids.

The sample data file is saved as `jogging.csv`

In [6]:
df = pd.read_csv("./jogging.csv",sep=",",header=0)
X_new_test = df[time:time+offset]

query = zero_shot_prompt(X_new_test)

ans = llm.invoke(query).content
print(ans)

WALKING


We find that as the LLM was not provided examples associated with the JOGGING activity, it is unable to identify it.

### 4) Few Shot with Random Data

In [7]:
x_min = np.min(X_test)
x_max = np.max(X_test)
y_min = np.min(y_test)
y_max = np.max(y_test)

X_random = (x_max-x_min)*np.random.random(X_test.shape) + x_min
y_random = np.random.randint(y_min,y_max+1,len(X_test))

rand_correct_count = 0

llm = ChatGroq(model=model, api_key=GROQ_API_KEY, temperature=0)

for i in range(len(X_random)):

    query = few_shot_prompt(X_random[i])
    ans = llm.invoke(query).content

    if(ans==folders[y_random[i]-1]):
        rand_correct_count+=1
        color = green
    else:
        color = red

    print(f"\r{model:<15}: Test case #{i:<3} Output: {color}{ans:<18} {reset}Actual: {folders[y_random[i]-1]:<20} Correct:{rand_correct_count}",end="")
    
print()
print(f"""
Model:               {model}
Total Test Cases:    {len(X_random)}
Correct Predictions: {rand_correct_count}
Accuracy:            {rand_correct_count/len(X_random):.2f}
""")

llama3-70b-8192: Test case #125 Output: [31;40mWALKING            [0mActual: STANDING             Correct:26

Model:               llama3-70b-8192
Total Test Cases:    126
Correct Predictions: 26
Accuracy:            0.21

