## **Task 4 : Data Collection and Decision Tree Implement  [4 marks]**

In [26]:
import warnings

warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)


# Library imports
import pandas as pd
import numpy as np
import os 

# Give the path of the test and train folder of UCI HAR Dataset
train_path = "./UCI HAR Dataset/train"
test_path = "./UCI HAR Dataset/test"

# Dictionary of activities. Provided by the dataset.
ACTIVITIES = {
    1: 'WALKING',
    2: 'WALKING_UPSTAIRS',
    3: 'WALKING_DOWNSTAIRS',
    4: 'SITTING',
    5: 'STANDING',
    6: 'LAYING',
}

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Combining Traing Data
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Load all the accelerometer data
total_acc_x = pd.read_csv(os.path.join(train_path, "Inertial Signals", "total_acc_x_train.txt"), sep='\\s+',
                          header=None)
total_acc_y = pd.read_csv(os.path.join(train_path, "Inertial Signals", "total_acc_y_train.txt"), sep='\\s+',
                          header=None)
total_acc_z = pd.read_csv(os.path.join(train_path, "Inertial Signals", "total_acc_z_train.txt"), sep='\\s+',
                          header=None)

# Read the subject IDs
subject_train = pd.read_csv(os.path.join(train_path, "subject_train.txt"), sep='\\s+', header=None)

# Read the labels
y = pd.read_csv(os.path.join(train_path, "y_train.txt"), sep='\\s+', header=None)

# Toggle through all the subjects.
for subject in np.unique(subject_train.values):

    sub_idxs = np.where(subject_train.iloc[:, 0] == subject)[0]
    labels = y.loc[sub_idxs]

    # Toggle through all the labels.
    for label in np.unique(labels.values):

        # make the folder directory if it does not exist
        if not os.path.exists(os.path.join("Combined", "Train", ACTIVITIES[label])):
            os.makedirs(os.path.join("Combined", "Train", ACTIVITIES[label]))

        label_idxs = labels[labels.iloc[:, 0] == label].index

        accx = []
        accy = []
        accz = []

        for idx in label_idxs:
            if accx is not None:
                accx = np.hstack((accx, total_acc_x.loc[idx][64:]))
                accy = np.hstack((accy, total_acc_y.loc[idx][64:]))
                accz = np.hstack((accz, total_acc_z.loc[idx][64:]))

            else:
                accx = total_acc_x.loc[idx]
                accy = total_acc_y.loc[idx]
                accz = total_acc_z.loc[idx]

        # saving the data into csv file
        data = pd.DataFrame({'accx': accx, 'accy': accy, 'accz': accz})
        save_path = os.path.join("Combined", "Train", ACTIVITIES[label], f"Subject_{subject}.csv")
        data.to_csv(save_path, index=False)

print("Done Combining the training data")

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Combining Test Data
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Load all the accelerometer data
total_acc_x = pd.read_csv(os.path.join(test_path, "Inertial Signals", "total_acc_x_test.txt"), sep='\\s+',
                          header=None)
total_acc_y = pd.read_csv(os.path.join(test_path, "Inertial Signals", "total_acc_y_test.txt"), sep='\\s+',
                          header=None)
total_acc_z = pd.read_csv(os.path.join(test_path, "Inertial Signals", "total_acc_z_test.txt"), sep='\\s+',
                          header=None)

# Read the subject IDs
subject_test = pd.read_csv(os.path.join(test_path, "subject_test.txt"), sep='\\s+', header=None)

# Read the labels
y = pd.read_csv(os.path.join(test_path, "y_test.txt"), sep='\\s+', header=None)

# Toggle through all the subjects.
for subject in np.unique(subject_test.values):

    sub_idxs = np.where(subject_test.iloc[:, 0] == subject)[0]
    labels = y.loc[sub_idxs]

    # Toggle through all the labels.
    for label in np.unique(labels.values):

        if not os.path.exists(os.path.join("Combined", "Test", ACTIVITIES[label])):
            os.makedirs(os.path.join("Combined", "Test", ACTIVITIES[label]))

        label_idxs = labels[labels.iloc[:, 0] == label].index

        accx = []
        accy = []
        accz = []
        for idx in label_idxs:
            if accx is not None:
                accx = np.hstack((accx, total_acc_x.loc[idx][64:]))
                accy = np.hstack((accy, total_acc_y.loc[idx][64:]))
                accz = np.hstack((accz, total_acc_z.loc[idx][64:]))

            else:
                accx = total_acc_x.loc[idx]
                accy = total_acc_y.loc[idx]
                accz = total_acc_z.loc[idx]

        # saving the data into csv file
        data = pd.DataFrame({'accx': accx, 'accy': accy, 'accz': accz})
        save_path = os.path.join("Combined", "Test", ACTIVITIES[label], f"Subject_{subject}.csv")
        data.to_csv(save_path, index=False)

print("Done Combining the testing data")
print("Done Combining the data")

# Creation of raw dataset

x = pd.read_csv(os.path.join(train_path,"X_train.txt"), sep='\\s+', header=None)
featurel = pd.read_csv(os.path.join("./UCI HAR Dataset","features.txt"), sep='\\s+', header=None)
featurelist = list(featurel.loc[:,1])
x.columns = featurelist
yg = pd.read_csv(os.path.join(train_path,"y_train.txt"), sep='\\s+', header=None)
xtest = pd.read_csv(os.path.join(test_path,"X_test.txt"), sep='\\s+', header=None)
xtest.columns = featurelist
ytest = pd.read_csv(os.path.join(test_path,"y_test.txt"), sep='\\s+', header=None)

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Done Combining the training data
Done Combining the testing data
Done Combining the data


In [77]:
# Library imports
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# plt.style.use('dark_background') # for dark background of plts

# Constants
time = 10
offset = 100
folders = ["LAYING", "SITTING", "STANDING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]
classes = {"WALKING": 1, "WALKING_UPSTAIRS": 2, "WALKING_DOWNSTAIRS": 3, "SITTING": 4, "STANDING": 5, "LAYING": 6}

combined_dir = os.path.join("Combined")

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Train Dataset
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

X_train = []
y_train = []
dataset_dir = os.path.join(combined_dir, "Train")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir, folder))

    for file in files:
        df = pd.read_csv(os.path.join(dataset_dir, folder, file), sep=",", header=0)
        df = df[offset:offset + time * 50]
        X_train.append(df.values)
        y_train.append(classes[folder])

X_train = np.array(X_train)
y_train = np.array(y_train)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Test Dataset
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

X_test = []
y_test = []
dataset_dir = os.path.join(combined_dir, "Test")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir, folder))
    for file in files:
        df = pd.read_csv(os.path.join(dataset_dir, folder, file), sep=",", header=0)
        df = df[offset:offset + time * 50]
        X_test.append(df.values)
        y_test.append(classes[folder])

X_test = np.array(X_test)
y_test = np.array(y_test)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Final Dataset
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# USE THE BELOW GIVEN DATA FOR TRAINING and TESTING purposes

# concatenate the training and testing data
X = np.concatenate((X_train, X_test))
y = np.concatenate((y_train, y_test))

# split the data into training and testing sets. Change the seed value to obtain different random splits.
seed = 4
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed, stratify=y)

print("Training data shape: ", X_train.shape)
print("Testing data shape: ", X_test.shape)

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Training data shape:  (126, 500, 3)
Testing data shape:  (54, 500, 3)


In [78]:
df = pd.DataFrame(np.concatenate((X_train.reshape((-1, 3)), np.repeat(y_train, 500).reshape(-1,1)), axis = 1), columns = ["accx", "accy", "accz", "Label"])

In [79]:
df["totalacc"] = df["accx"]**2 + df["accy"]**2 + df["accz"]**2

In [152]:
import tsfel

cfg = tsfel.get_features_by_domain()
features = tsfel.time_series_features_extractor(cfg,df[["accx","accy","accz"]], fs=50, window_size=500)


*** Feature extraction started ***



*** Feature extraction finished ***


### 1. Use the Decision Tree model trained on the UCI-HAR dataset to predict the activities that you performed. Report the accuracy, precision, recall and confusion matrix of the model. You have three version of UCI dataset you can use a)Raw data from accelerometer, b)TSFEL featurised data, c)Features provided by author. Choose which version to use, ensuring that your test data is similar to your training data. How did the model perform? [1 marks]


In [83]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

d = pd.read_csv("./Task4-Data/Processed/Combined.csv")
acc = d[['gFx','gFy','gFz']]
dtest = d["Label"]
X_train_reshaped = X_train.reshape(-1,1500)
acc = acc.to_numpy()
acc_reshaped = acc.reshape(18,1500)
clf = DecisionTreeClassifier(random_state=2,max_depth=6)
clf.fit(X_train_reshaped, y_train)
dpred = clf.predict(acc_reshaped)
print(f"Accuracy of author's raw data trained decision tree on our data is {accuracy_score(dtest[::500], dpred)*100:.2f}%")

Accuracy of author's raw data trained decision tree on our data is 38.89%


In [85]:
clf = DecisionTreeClassifier(random_state=2,max_depth=8)

clf.fit(features, y_train)
# print(d.columns)
d.columns = ['Person','accx','accy','accz','Total acc','Label']
dtsfel = tsfel.time_series_features_extractor(cfg,d[["accx","accy","accz"]], fs=50, window_size=500)
dpred = clf.predict(dtsfel)
print(f"Accuracy of TSFEL trained decision tree on our data is {accuracy_score(dtest[::500],dpred)*100:.2f}%")

*** Feature extraction started ***



*** Feature extraction finished ***
Accuracy of TSFEL trained decision tree on our data is 27.78%


> We cant use author's featurized data as we dont have the same features of data from our recordings.

| Method | Accuracy | 
| --- | --- |
| Raw Data | 38.89% |
| TSFEL | 27.78% | 

### 2. Use the data you collected to predict the activities that you performed. Decide whether to apply preprocessing and featurization, and if so, choose the appropriate methods. How did the model perform? [1 marks]


`own data to train and test`

In [45]:
# raw data first
X_train,x_test,y_train,y_test = train_test_split(acc,dtest,test_size=0.3)
clf = DecisionTreeClassifier(random_state=2,max_depth=8)
clf.fit(X_train,y_train)
dpred = clf.predict(x_test)
print(f"Accuracy of the model trained and tested on our raw data is {accuracy_score(y_test,dpred)*100:.2f}%")

Accuracy of the model trained and tested on our raw data is 80.44%


To reduce bias in the training data, a better approach will be to train the model with 2 person's data and test it on 3rd person's data

> Now applying `featurization` using **tsfel**

In [None]:
cfg = tsfel.get_features_by_domain()
acctsfel = tsfel.time_series_features_extractor(cfg,acc,fs=50,window_size=500)

*** Feature extraction started ***



*** Feature extraction finished ***


In [48]:
xtr,xt,ytr,yt = train_test_split(acctsfel,dtest[::500],test_size=0.3)
clf = DecisionTreeClassifier(random_state=42,max_depth=8)
clf.fit(xtr,ytr)
ypr = clf.predict(xt)
print(f"Accuracy of the model trained and tested on our raw data is {accuracy_score(yt,ypr)*100:.2f}%")

Accuracy of the model trained and tested on our raw data is 50.00%


Accuracy fell after featurizing the data using tsfel
| Method | Accuracy | 
| --- | --- |
| Raw Data | 80.44% |
| TSFEL | 50% | 

### 3. Use the Few-Shot prompting method using UCI-HAR dataset to predict the activities that you performed. Ensure that both your examples and test query undergo similar preprocessing. How did the model perform? [1 marks]


for _giving example_ data to the `LLM`, we'll use the tsfel data extracted from the raw UCI-HAR dataset

To choose which features should be used, we'll ask the llm itself

In [173]:
# Groq API and Models 
import os
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
load_dotenv()
groq_api_key_akash = os.getenv("API_KEY_AKASH") #loading api keys from .env file 
groq_api_key_romit= os.getenv("API_KEY_ROMIT")

groq_models = {
    "llama3-70b": "llama3-70b-8192", 
    "mixtral": "mixtral-8x7b-32768", 
    "gemma-7b": "gemma-7b-it",
    "llama3.1-70b":"llama-3.1-70b-versatile",
    "llama3-8b":"llama3-8b-8192",
    "llama3.1-8b":"llama-3.1-8b-instant",
    "gemma-9b":"gemma2-9b-it"
}

modelName = "llama3.1-70b"

llm = ChatGroq(model=groq_models[modelName], api_key=groq_api_key_akash, temperature=0)

In [139]:
answer = llm.invoke("If you are a Human Activity recognition model. What features will be best suited to identify and classify data as an activity. Select and name the specific tsfel features from the features extracted from a raw dataset containing accelerometer data for Human Activity recognition")
print(answer.content)

As a Human Activity Recognition (HAR) model, I'd be delighted to extract and utilize the most informative features from accelerometer data to accurately identify and classify various human activities. Here are the top features I'd select from the Time Series Feature Extraction Library (TSFEL) to achieve this:

**1. Time Domain Features:**

* **Mean** (`mean`): Average acceleration value, which can help distinguish between activities with different intensity levels (e.g., walking vs. running).
* **Standard Deviation** (`std`): Measures the variability of acceleration values, useful for identifying activities with varying movement patterns (e.g., jumping vs. walking).
* **Root Mean Square** (`rms`): A measure of the average power of the signal, which can help distinguish between activities with different energy expenditure (e.g., running vs. sitting).

**2. Frequency Domain Features:**

* **Spectral Power** (`spectral_power`): Represents the distribution of power across different frequen

Hence, we chose the following features `"accx_Spectral entropy", "accx_Power bandwidth", "accx_Maximum frequency", "accx_Median frequency", "accx_Root mean square"`

In [154]:
features_chosen = ["accx_Spectral entropy", "accx_Power bandwidth", "accx_Maximum frequency", "accx_Median frequency", "accx_Root mean square"]

Next, we choose: `"accx_Mean", "accx_Standard deviation", "accy_Mean", "accy_Standard deviation", "accz_Mean", "accz_Standard deviation"`

In [162]:
features_chosen = ["accx_Mean", "accx_Standard deviation", "accy_Mean", "accy_Standard deviation", "accz_Mean", "accz_Standard deviation"]

In [163]:
features_subset = features[features_chosen]
features_subset

Unnamed: 0,accx_Mean,accx_Standard deviation,accy_Mean,accy_Standard deviation,accz_Mean,accz_Standard deviation
0,0.902649,0.203626,-0.282842,0.146770,-0.412284,0.147509
1,0.255891,0.007540,0.644078,0.004683,0.732082,0.007904
2,0.978388,0.003478,-0.179956,0.008010,-0.279538,0.006517
3,0.951163,0.222022,-0.311365,0.124206,-0.245482,0.127796
4,1.001998,0.238608,-0.188667,0.147697,-0.141150,0.127582
...,...,...,...,...,...,...
121,0.977764,0.198636,-0.199177,0.131601,-0.267679,0.151983
122,0.822427,0.073545,0.354603,0.114741,0.429520,0.058798
123,0.188234,0.004684,0.533758,0.019640,0.833852,0.013331
124,0.904258,0.206619,-0.377194,0.175782,-0.308029,0.174014


for _asking answers_ to the `LLM`, we'll use the tsfel data extracted from the **data collected by us**

In [175]:
features_subset_test = dtsfel[features_chosen]
features_subset_test

Unnamed: 0,accx_Mean,accx_Standard deviation,accy_Mean,accy_Standard deviation,accz_Mean,accz_Standard deviation
0,0.98731,0.171184,-0.014573,0.212459,-0.157483,0.177988
1,0.937482,0.256595,-0.022257,0.211054,-0.330475,0.212309
2,0.988906,0.372105,-0.068037,0.203122,-0.026799,0.246365
3,0.996646,0.003538,0.032597,0.004232,-0.075714,0.009085
4,0.984541,0.05381,0.010729,0.008248,-0.151944,0.072393
5,0.156439,0.074329,-0.004598,0.008746,0.980034,0.089981
6,1.000672,0.211837,-0.004624,0.194552,-0.081874,0.184651
7,0.953038,0.245485,0.079508,0.145019,-0.279726,0.172761
8,0.989491,0.531527,0.039558,0.159744,0.010351,0.238845
9,0.995629,0.003896,-0.05968,0.00435,0.096705,0.007608


In [167]:
def build_query_fewshot(train, test):
    return f"""
    * You are a Human Activity Recognition model. 
    * Your task is to classify the given data as one of the six activity classes listed below.
    * The data is recorded by accelerometer positioned above the torso of the person.
    * The data's format is as such that each line contains the 
    -> Mean acceleration in X, Standard deviation in acceleration in X, Mean acceleration in Y, Standard deviation in acceleration in Y, Mean acceleration in Z, Standard deviation in acceleration in Z.
    * Features are extracted from accelorometer data using tsfel.
    * Pay attention to the examples given.

    1. Walking
    2. Walking Upstairs
    3. Walking Downstairs
    4. Sitting
    5. Standing
    6. Lying Down

    *Here are few examples:
    {train}

    * Provide JUST the number corresponding to the predicted activity

    data: 
    {test}
    """ 

In [176]:
m_iters = 5
iters = 10
number_of_examples = 6
number_of_features = 6
import random

def evaluate_llm_fewshot(iters = 10):
    runs_results = []
    runs_results_staticdynamic = []
    
    train_string = ""

    for i in range(number_of_examples):
        c = (i % 6) + 1  # class number
        sample = random.randint(0, 20) # 21 is length of a training dataframe of a specific activity
        train_string += str(i+1) + ". Data:\n" + ", ".join([str(point) for point in features_subset.iloc[((c-1)*21) + sample].to_numpy()]) + "\n\nActivity: " + str(c) + ": " + ACTIVITIES[c] + "\n\n"

    for _ in range(iters):
        crct_class = random.randint(1, 6)
        sample = random.randint(0, 2) # 3 persons data was recorder
        test_data = features_subset_test.iloc[(sample*3) + crct_class - 1]
        test_string = ", ".join([str(point) for point in test_data.to_numpy()])
        query = build_query_fewshot(train_string, test_string)
        
        answer = llm.invoke(query)

        # print(answer.content, crct_class)
        pred_digit = int(answer.content.split()[-1].lstrip("(").rstrip(")."))
        runs_results.append(1 if pred_digit == crct_class else 0)
        runs_results_staticdynamic.append(1 if (pred_digit in [1, 2, 3] and crct_class in [1, 2, 3]) or (pred_digit in [4, 5, 6] and crct_class in [4, 5, 6]) else 0)
    
    return (np.round(np.mean(runs_results), 4), np.round(np.mean(runs_results_staticdynamic), 4))

accuracies = []
accuracies_staticdynamic = []
for _ in range(m_iters):
    accuracy = evaluate_llm_fewshot(iters=iters)
    accuracies.append(accuracy[0])
    accuracies_staticdynamic.append(accuracy[1])

print(f"Max Accuracy over {m_iters} runs of {iters} prompts each:", max(accuracies) * 100, "\nMean:", np.round(np.mean(accuracies) * 100, 2))
print("Max Accuracy for just differentiating between static and dynamic activities:", max(accuracies_staticdynamic) * 100, "\nMean:", np.round(np.mean(accuracies_staticdynamic) * 100, 2))

Max Accuracy over 5 runs of 10 prompts each: 70.0 
Mean: 28.0
Max Accuracy for just differentiating between static and dynamic activities: 100.0 
Mean: 60.0


> Experimenting with the features: `"accx_Spectral entropy", "accx_Power bandwidth", "accx_Maximum frequency", "accx_Median frequency", "accx_Root mean square"`  
> We got accuracy of `15%`

> Experimenting with the features: `"accx_Mean", "accx_Standard deviation", "accy_Mean", "accy_Standard deviation", "accz_Mean", "accz_Standard deviation"`  
> We got **max** accuracy of `70%` and **mean** of `28%`

### 4. Use the Few-Shot prompting method using the data you collected to predict the activities that you performed. Adopt proper processing methods as needed. How did the model perform? [1 marks]


In [189]:
# the whole dataset in this case is last case's test set
dataset = features_subset_test

In [None]:
m_iters = 5
iters = 10
number_of_examples = 6
number_of_features = 6
import random

def evaluate_llm_fewshot(iters = 10):
    runs_results = []
    runs_results_staticdynamic = []

    test_person = random.randint(0, 2)
    train_people = [i for i in [0, 1, 2] if i != test_person]
    features_subset_test = dataset.iloc[test_person*6:test_person*6 + 6]
    features_subset = pd.concat([dataset.iloc[train_people[0]*6:train_people[0]*6 + 6], dataset.iloc[train_people[1]*6:train_people[1]*6 + 6]])
    # print(features_subset)
    # print(features_subset_test)

    train_string = ""
    for i in range(number_of_examples):
        c = (i % 6) + 1  # class number
        sample = random.randint(0, 1) # 2 is length of a training dataframe of a specific activity
        train_string += str(i+1) + ". Data:\n" + ", ".join([str(point) for point in features_subset.iloc[(sample*3) + c - 1].to_numpy()]) + "\n\nActivity: " + str(c) + ": " + ACTIVITIES[c] + "\n\n"

    for _ in range(iters):
        crct_class = random.randint(1, 6)
        test_data = features_subset_test.iloc[crct_class - 1]
        test_string = ", ".join([str(point) for point in test_data.to_numpy()])
        query = build_query_fewshot(train_string, test_string)
        # print(query)
        
        answer = llm.invoke(query)

        # print(answer.content, crct_class)
        pred_digit = int(answer.content.split()[-1].lstrip("(").rstrip(")."))
        runs_results.append(1 if pred_digit == crct_class else 0)
        runs_results_staticdynamic.append(1 if (pred_digit in [1, 2, 3] and crct_class in [1, 2, 3]) or (pred_digit in [4, 5, 6] and crct_class in [4, 5, 6]) else 0)
    
    return (np.round(np.mean(runs_results), 4), np.round(np.mean(runs_results_staticdynamic), 4))

accuracies = []
accuracies_staticdynamic = []
for _ in range(m_iters):
    accuracy = evaluate_llm_fewshot(iters=iters)
    accuracies.append(accuracy[0])
    accuracies_staticdynamic.append(accuracy[1])

print(f"Max Accuracy over {m_iters} runs of {iters} prompts each:", max(accuracies) * 100, "\nMean:", np.round(np.mean(accuracies) * 100, 2))
print("Max Accuracy for just differentiating between static and dynamic activities:", max(accuracies_staticdynamic) * 100, "\nMean:", np.round(np.mean(accuracies_staticdynamic) * 100, 2))

> Experimenting with the features: `"accx_Mean", "accx_Standard deviation", "accy_Mean", "accy_Standard deviation", "accz_Mean", "accz_Standard deviation"`  
> We got **max** accuracy of `70%` and **mean** of `48%`