# Notebook to demonstrate Zero shot and Few shot Learning

In [1]:
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
from dotenv import load_dotenv
import os
from typing import Union
import sys
sys.path.append(os.path.abspath('..'))
import numpy as np
import re

In [2]:
activity_classes = ["LAYING", "SITTING", "STANDING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]

def get_sample_data(path, files_per_activity=-1):
    sample_data = []
    for activity_class in activity_classes:
        subpath = os.path.join(os.path.abspath('..'), path, activity_class)
        files = os.listdir(subpath)
        num_files = len(files) if files_per_activity == -1 else files_per_activity
        for file in range(num_files):
            sample_data.append(os.path.join(subpath, files[file]))
    return sample_data

def load_csv_files(directory, label, add_timestamp=True, add_total_acc=True, trim_data=True):
    df_list = []
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path)
            df['subject'] = filename.replace('.csv', '')
            # 50 samples per second
            # 1 sample takes 1/50 seconds
            if add_timestamp:
                df['timestamp'] = (df.index + 1) / 50
            if add_total_acc:
                df['total_acc'] = df['accx'] ** 2 + df['accy'] ** 2 + df['accz'] ** 2
            if trim_data:
                df_list.append(df.iloc[100:600, :])
            else:
                df_list.append(df)
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df['y'] = label
    return combined_df

def prepare_dataset(path):
    train_path = os.path.join(path, "Train")
    test_path = os.path.join(path, "Test")
    
    train_dfs = pd.concat([load_csv_files(os.path.join(train_path, activity_class), label) for label, activity_class in enumerate(activity_classes)])
    test_dfs = pd.concat([load_csv_files(os.path.join(test_path, activity_class), label) for label, activity_class in enumerate(activity_classes)])
    
    return train_dfs, test_dfs

def get_tsfel_features(tsfel_features_path):   
    data = pd.DataFrame()
    for label, activity_class in enumerate(activity_classes):
        folder_path = os.path.join(tsfel_features_path, activity_class)
        dfs = load_csv_files(folder_path, label, False, False, False)
        data = pd.concat([data, dfs], axis=0, ignore_index=True)
    return data

In [5]:
class Example:
    def __init__(self, data: Union[pd.DataFrame, None], classification: Union[str, None], modify=False, precision=None):
        self.data = self.modify_data(data, precision) if modify else data
        self.classification = classification
        self.text = self.format_data_to_string(self.data)

    @staticmethod
    def modify_data(data: Union[pd.DataFrame, None], precision=None):
        data = data.round(precision if precision is not None else 0) if data is not None else data
        return data
    
    @staticmethod
    def format_data_to_string(data: Union[pd.DataFrame, None]) -> str:
        if data is not None:
            array = data.to_numpy()
            array_str = np.array2string(array, separator=',', threshold=np.inf, max_line_width=np.inf)
            formatted_str = re.sub(r'[\[\]]', '', array_str).replace(' ', '')
            return formatted_str
        return ''

In [6]:
titles = [activity.title().replace('_', ' ') for activity in activity_classes]

In [7]:
load_dotenv()
groq_models = {"llama-3.1-8b-instant": "llama-3.1-8b-instant", "llama-3.1-70b-versatile": "llama-3.1-70b-versatile", "llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

In [8]:
from GenerateDataset import generate_dataset
from MakeHARdataset import X_train, X_test, y_train, y_test

def clean_data(folder: str = 'Train'):
    dataset_features = generate_dataset(filepath=r"C:\Users\Nishchay\Desktop\College\ES 335 Machine Learning\Assignments\es335-24-fall-assignment-1\Datasets\UCI HAR Dataset", folder=folder).columns
    features = ['tBodyAcc_mean()_X', 'tBodyAcc_mean()_Y', 'tBodyAcc_mean()_Z', 'tGravityAcc_mean()_X', 'tGravityAcc_mean()_Y', 'tGravityAcc_mean()_Z', 'tBodyGyro_mean()_X', 'tBodyGyro_mean()_Y', 'tBodyGyro_mean()_Z', 'y']

    X = X_train if folder != 'Test' else X_test
    y = y_train if folder != 'Test' else y_test

    dataframes = [pd.DataFrame(dataframe, columns=dataset_features[:-1]).assign(y=label) for dataframe, label in zip(X, y)]
    labels = []
    np.random.seed = 101
    np.random.shuffle(dataframes)

    for i in range(len(dataframes)):
        labels.append(titles[int(dataframes[i].iloc[0, -1])])
        dataframes[i] = dataframes[i][features[:-1]]
        dataframes[i] = dataframes[i].iloc[10:20, :]

    return dataframes, labels

train_dataframes, train_labels = clean_data('Train')
test_dataframes, test_labels = clean_data('Test')

train_examples = [Example(dataframe, label, True, 3) for dataframe, label in zip(train_dataframes, train_labels)]
test_examples = [Example(dataframe, label, True, 3) for dataframe, label in zip(test_dataframes, test_labels)]

Loading feature data from c:\Users\Nishchay\Desktop\College\ES 335 Machine Learning\Assignments\es335-24-fall-assignment-1\Datasets\UCI HAR Dataset\train...
Loading feature data from c:\Users\Nishchay\Desktop\College\ES 335 Machine Learning\Assignments\es335-24-fall-assignment-1\Datasets\UCI HAR Dataset\test...


# Zero Shot

In [7]:
model_name = "llama-3.1-70b-versatile"
cur_api_key = 1

query_str = """You are given some values of the following features of a human, in order:
1. tBodyAcc-mean()-X: Mean of the body acceleration signal in the X-axis (horizontal movement).
2. tBodyAcc-mean()-Y: Mean of the body acceleration signal in the Y-axis (vertical movement).
3. tBodyAcc-mean()-Z: Mean of the body acceleration signal in the Z-axis (lateral movement).
4. tGravityAcc-mean()-X: Mean of the gravity acceleration signal in the X-axis.
5. tGravityAcc-mean()-Y: Mean of the gravity acceleration signal in the Y-axis.
6. tGravityAcc-mean()-Z: Mean of the gravity acceleration signal in the Z-axis.

Now classify the below activities into one of these activities: Laying, Sitting, Standing, Walking, Walking Downstairs, Walking Upstairs. Make sure you limit your response for each activity to ONLY your classification, without providing any reasoning or justification for it. If you're not sure, return your best guess:

"""

test_template = """
=======================================================
Activity {activity_number}:

{activity}

"""

for cur_example, example in enumerate(test_examples):
    query_str += test_template.format(activity_number=cur_example + 1, activity=example.text)

response = None
while not response:
    try:
        llm = ChatGroq(model=groq_models[model_name], api_key=os.environ[f'API_KEY_{cur_api_key}'], temperature=0)
        response = llm.invoke(query_str).content.strip()
    except:
        cur_api_key = (cur_api_key % 5) + 1

In [8]:
classifications = response.split('\n')
classifications = [classification[classification.index('.')+2:] for classification in classifications]

correct = sum(1 if classification == test_labels[i] else 0 for i, classification in enumerate(classifications))
total = 54
accuracy = correct / total

accuracy *= 100
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 3.70%


In [9]:
query_str = """You are given some values of the following features of a human, in order:
1. tBodyAcc-mean()-X: Mean of the body acceleration signal in the X-axis (horizontal movement).
2. tBodyAcc-mean()-Y: Mean of the body acceleration signal in the Y-axis (vertical movement).
3. tBodyAcc-mean()-Z: Mean of the body acceleration signal in the Z-axis (lateral movement).
4. tGravityAcc-mean()-X: Mean of the gravity acceleration signal in the X-axis.
5. tGravityAcc-mean()-Y: Mean of the gravity acceleration signal in the Y-axis.
6. tGravityAcc-mean()-Z: Mean of the gravity acceleration signal in the Z-axis.

Your task is to classify the activity into one of the following classes: Laying, Sitting, Standing, Walking, Walking Downstairs, Walking Upstairs.

Now classify the below activity. Make sure you limit your response to ONLY your classification, without providing any reasoning or justification for it. If you're not sure, return your best guess:

"""

correct = 0
total = 0
cur_example = 0
cur_api_key = 1

while cur_example < len(test_examples):
    try:
        llm = ChatGroq(model=groq_models[model_name], api_key=os.environ[f'API_KEY_{cur_api_key}'], temperature=0)
        response = llm.invoke(query_str + test_examples[cur_example].text).content.strip()
        print(response)
        print(test_examples[cur_example].classification)
        print()

        if response == test_examples[cur_example].classification:
            correct += 1
        total += 1
        cur_example += 1
    except:
        cur_api_key = (cur_api_key % 5) + 1 

accuracy = correct / total
accuracy *= 100
print(f'Accuracy: {accuracy:.2f}%')

Standing
Sitting

Standing
Walking Upstairs

Walking
Walking Upstairs

Standing
Laying

Standing
Standing

Standing
Sitting

Standing
Walking Downstairs

Standing
Standing



# Few Shot 

## Passing all tests together

In [9]:
model_name = "llama-3.1-70b-versatile"
cur_api_key = 1

In [None]:
query_str = """You are given some values of the following features of a human, in order:
1. tBodyAcc-mean()-X: Mean of the body acceleration signal in the X-axis (horizontal movement).
2. tBodyAcc-mean()-Y: Mean of the body acceleration signal in the Y-axis (vertical movement).
3. tBodyAcc-mean()-Z: Mean of the body acceleration signal in the Z-axis (lateral movement).
4. tGravityAcc-mean()-X: Mean of the gravity acceleration signal in the X-axis.
5. tGravityAcc-mean()-Y: Mean of the gravity acceleration signal in the Y-axis.
6. tGravityAcc-mean()-Z: Mean of the gravity acceleration signal in the Z-axis.

Your task is to classify the activity into one of the following classes: Laying, Sitting, Standing, Walking, Walking Downstairs, Walking Upstairs.

Here are some examples to help you get started:
"""

example_template = """
=======================================================
- Activity:
{activity}

- Classification: {classification}
"""

for example in train_examples:
    query_str += example_template.format(activity=example.text, classification=example.classification)

ending_str = """
Now classify the below activities. Make sure you limit your response for each activity to ONLY your classification, without providing any reasoning or justification for it. If you're not sure, return your best guess:
"""

query_str += ending_str

test_template = """
=======================================================
Activity {activity_number}:

{activity}

"""

for cur_example, example in enumerate(test_examples):
    query_str += test_template.format(activity_number=cur_example + 1, activity=example.text)

response = None
while not response:
    try:
        llm = ChatGroq(model=groq_models[model_name], api_key=os.environ[f'API_KEY_{cur_api_key}'], temperature=0)
        response = llm.invoke(query_str).content.strip()
    except:
        cur_api_key = (cur_api_key % 5) + 1

In [None]:
classifications = response.split('\n\n')
classifications = [classification[classification.index(':')+2:] for classification in classifications]

correct = sum(1 if classification == test_labels[i] else 0 for i, classification in enumerate(classifications))
total = 54
accuracy = correct / total

accuracy *= 100
print(f'Accuracy: {accuracy:.2f}%')

## Passing tests one-by-one

In [10]:
query_str = """You are given some values of the following features of a human, in order:
1. tBodyAcc-mean()-X: Mean of the body acceleration signal in the X-axis (horizontal movement).
2. tBodyAcc-mean()-Y: Mean of the body acceleration signal in the Y-axis (vertical movement).
3. tBodyAcc-mean()-Z: Mean of the body acceleration signal in the Z-axis (lateral movement).
4. tGravityAcc-mean()-X: Mean of the gravity acceleration signal in the X-axis.
5. tGravityAcc-mean()-Y: Mean of the gravity acceleration signal in the Y-axis.
6. tGravityAcc-mean()-Z: Mean of the gravity acceleration signal in the Z-axis.

Your task is to classify the activity into one of the following classes: Laying, Sitting, Standing, Walking, Walking Downstairs, Walking Upstairs.

Here are some examples to help you get started:
"""

example_template = """
=======================================================
- Activity:
{activity}

- Classification: {classification}
"""

for example in train_examples:
    query_str += example_template.format(activity=example.text, classification=example.classification)

ending_str = """
Now classify the below activity. Make sure you limit your response to ONLY your classification, without providing any reasoning or justification for it. If you're not sure, return your best guess:

"""

query_str += ending_str

correct = 0
total = 0
cur_example = 0
cur_api_key = 1

while cur_example < len(test_examples):
    try:
        llm = ChatGroq(model=groq_models[model_name], api_key=os.environ[f'API_KEY_{cur_api_key}'], temperature=0)
        response = llm.invoke(query_str + test_examples[cur_example].text).content.strip()
        print(response)
        print(test_examples[cur_example].classification)
        print()

        if response == test_examples[cur_example].classification:
            correct += 1
        total += 1
        cur_example += 1
    except:
        cur_api_key = (cur_api_key % 5) + 1 

accuracy = correct / total
accuracy *= 100
print(f'Accuracy: {accuracy:.2f}%')