In [147]:
import  numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import time
import os
import joblib

from langchain_groq.chat_models import ChatGroq

from sklearn.decomposition import PCA
import random

In [148]:
with open(r'C:\Users\LENOVO\Desktop\ML assignment 1\api_key.txt', 'r') as f:
    Groq_token = f.read().strip()

In [149]:
llm = ChatGroq(model = "llama3-70b-8192", api_key = Groq_token, temperature=1)

In [150]:
activity_dictionary = {
    1: 'WALKING',
    2: 'WALKING UPSTAIRS',
    3: 'WALKING DOWNSTAIRS',
    4: 'SITTING',
    5: 'STANDING',
    6: 'LAYING'
}

activities = ['WALKING',
    'WALKING UPSTAIRS',
    'WALKING DOWNSTAIRS',
    'SITTING',
    'STANDING',
    'LAYING']

In [151]:
# Loading data collected from the wild

dataset_dir = r'C:\Users\LENOVO\Desktop\ML assignment 1\Collected Data\Collected Data\Processed files'
files=os.listdir(dataset_dir)
classes = {"Walking":1,"Climbing_up":2,"Climbing_down":3,"Sitting":4,"Standing":5,"Laying":6}

X=[]
Y=[]
for file in files:
    df=pd.read_csv(os.path.join(dataset_dir,file))
    X.append(df.values)

    for i in classes:
        if file.startswith(i):
            Y.append(classes[i])
            break

X_collected = np.array(X)
Y_collected = np.array(Y)

In [152]:
X_collected.shape

(24, 500, 3)

In [153]:
Y_collected.shape

(24,)

## Task 4.3
#### Use the Few-Shot prompting method using UCI-HAR dataset to predict the activities that you performed. Ensure that both your examples and test query undergo similar preprocessing. How did the model perform?

In [155]:
# Numpy arrays obtained by MakeDataset.py 
# --> Train data compiled from the dataset (acc_x, acc_y, acc_z)
x_train_dataset = np.load(r"C:\Users\LENOVO\Desktop\ML assignment 1\X_train.npy")
y_train_dataset = np.load(r"C:\Users\LENOVO\Desktop\ML assignment 1\y_train.npy")

In [156]:
x_train_dataset.shape

(54, 500, 3)

In [157]:
y_train_dataset.shape

(54,)

Activity Feature Vectors is a dictionary that contains activity labels and a few of its corresponding training examples (extracted from the dataset) 

In [158]:
activity_feature_vectors = {}
for activity in activities:
    activity_feature_vectors[activity] = []

In [159]:
for i in range(len(y_train_dataset)):
    y = y_train_dataset[i]
    activity_label = activity_dictionary[y]
    activity_feature_vectors[activity_label].append(x_train_dataset[i, :, :])

The below few shot prompt takes in accelerometer readings from the dataset and predicts on the data that is collected by us

In [160]:
i = 0
few_shot_predictions = []

for test_data,test_label in zip(X_collected, Y_collected): 
    query_few_shot = f"""

* You are a multiclass classifier model
* You will be given below readings of humans performing one of the following six physical activities with the activity_labels: {activities}
* The inputs are 500 timesteps of acc_x, acc_y, acc_z
* Your task is to analyze the given input readings and predict which of the mentioned activities is the participant performing. 

Here are a few examples for you to learn from: {activity_feature_vectors}

* Provide ONLY the activity name. NOTHING ELSE.
For the input data below, VERY CAREFULLY identify the activity being performed: Input --> {test_data}
""" 

    few_shot_answer = llm.invoke(query_few_shot)
    few_shot_predictions.append(few_shot_answer.content)

    i += 1
    print("Test sample number:", i)
    print("Few shot:",few_shot_answer.content)
    # print(quantitative_query_few_shot)
    
    print("Correct:", activity_dictionary[test_label])
    # print(few_shot_predictions)    
    print("---------------------------------------------------------")
    time.sleep(1.5)

Test sample number: 1
Few shot: LAYING
Correct: WALKING DOWNSTAIRS
---------------------------------------------------------
Test sample number: 2
Few shot: LAYING
Correct: WALKING DOWNSTAIRS
---------------------------------------------------------
Test sample number: 3
Few shot: LAYING
Correct: WALKING DOWNSTAIRS
---------------------------------------------------------
Test sample number: 4
Few shot: WALKING
Correct: WALKING DOWNSTAIRS
---------------------------------------------------------
Test sample number: 5
Few shot: LAYING
Correct: WALKING UPSTAIRS
---------------------------------------------------------
Test sample number: 6
Few shot: WALKING
Correct: WALKING UPSTAIRS
---------------------------------------------------------
Test sample number: 7
Few shot: LAYING
Correct: WALKING UPSTAIRS
---------------------------------------------------------
Test sample number: 8
Few shot: WALKING
Correct: WALKING UPSTAIRS
---------------------------------------------------------
Test 

In [161]:
def accuracy(y_true, y_pred):
    y_true = np.array(y_true[:len(y_pred)])
    y_pred = np.array(y_pred)
    correct = y_true == y_pred
    # print(correct)
    return(np.sum(correct)/len(y_true))

In [162]:
y_test = []
for label in list(np.array(Y_collected, dtype = "int32").ravel()):
    y_test.append(activity_dictionary[label])

In [163]:
print("Few shot accuracy: ")
print(accuracy(y_test, few_shot_predictions))

Few shot accuracy: 
0.16666666666666666


The accuracy is equivalent to random prediction. <br>
o The reason for this might be the fact that the orientation in which we used the device might be different to the orientation that the authors used while curating their dataset <br>
o OR due to some differences in the device or software used while taking the readings

## Task 4.4
#### Use the Few-Shot prompting method using the data you collected to predict the activities that you performed. Adopt proper processing methods as needed. How did the model perform?

Random shuffling the collected values in order to get train and test set

In [119]:
import random
train_indices = list(range(X_collected.shape[0]))
random.shuffle(train_indices)
print(train_indices)

[1, 20, 12, 3, 7, 9, 2, 22, 15, 10, 8, 6, 19, 23, 14, 17, 18, 11, 21, 16, 4, 0, 13, 5]


In [120]:
x_train_collected = X_collected[train_indices[:17], :, :]
y_train_collected = Y_collected[train_indices[:17]]

x_test_collected = X_collected[train_indices[17:], :, :]
y_test_collected = Y_collected[train_indices[17:]]

In [121]:
y_train_collected

array([3, 1, 4, 3, 2, 6, 3, 1, 4, 6, 6, 2, 5, 1, 4, 5, 5])

In [122]:
activity_feature_vectors = {}
for activity in activities:
    activity_feature_vectors[activity] = []

In [123]:
for i in range(len(y_train_collected)):
    y = y_train_collected[i]
    activity_label = activity_dictionary[y]
    activity_feature_vectors[activity_label].append(x_train_collected[i, :, :])

The below few shot prompt takes in accelerometer readings from the collected data and predicts on the data that is collected by us

In [124]:
i = 0
few_shot_predictions = []

for test_data,test_label in zip(x_test_collected, y_test_collected): 
    query_few_shot = f"""

* You are a multiclass classifier model
* You will be given below readings of humans performing one of the following six physical activities with the activity_labels: {activities}
* The inputs are 500 timesteps of acc_x, acc_y, acc_z
* Your task is to analyze the given input readings and predict which of the mentioned activities is the participant performing. 

Here are a few examples for you to learn from: {activity_feature_vectors}

* Provide ONLY the activity name. NOTHING ELSE.
For the input data below, VERY CAREFULLY identify the activity being performed: Input --> {test_data}
""" 

    few_shot_answer = llm.invoke(query_few_shot)
    few_shot_predictions.append(few_shot_answer.content)

    i += 1
    print("Test sample number:", i)
    print("Few shot:",few_shot_answer.content)
    # print(quantitative_query_few_shot)
    
    print("Correct:", activity_dictionary[test_label])
    # print(few_shot_predictions)    
    print("---------------------------------------------------------")
    time.sleep(1.5)

Test sample number: 1
Few shot: LAYING
Correct: LAYING
---------------------------------------------------------
Test sample number: 2
Few shot: WALKING
Correct: WALKING
---------------------------------------------------------
Test sample number: 3
Few shot: STANDING
Correct: STANDING
---------------------------------------------------------
Test sample number: 4
Few shot: WALKING
Correct: WALKING UPSTAIRS
---------------------------------------------------------
Test sample number: 5
Few shot: WALKING DOWNSTAIRS
Correct: WALKING DOWNSTAIRS
---------------------------------------------------------
Test sample number: 6
Few shot: STANDING
Correct: SITTING
---------------------------------------------------------
Test sample number: 7
Few shot: WALKING DOWNSTAIRS
Correct: WALKING UPSTAIRS
---------------------------------------------------------


In [126]:
y_test = []
for label in list(np.array(y_test_collected, dtype = "int32").ravel()):
    y_test.append(activity_dictionary[label])

In [127]:
print("Few shot accuracy: ")
print(accuracy(y_test, few_shot_predictions))

Few shot accuracy: 
0.5714285714285714


We get an accuracy of ~57% which is decent but one of the limitations for this is that our test set is too small