In [13]:
import pandas as pd
import os

# Define subjects
subjects = ['LAKSH', 'RUDRA', 'PARTHIV']

# Define activities
activities = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS', 'WALKING_UPSTAIRS']

# Function to load the first 500 rows of each dataset
def load_data(subjects, activities):
    data_list = []
    for subject in subjects:
        for activity in activities:
            file_name = f"{subject}_{activity}.csv"
            file_path = os.path.join(os.getcwd(), file_name)
            print(f"Checking file: {file_path}")  # Debugging line
            if os.path.exists(file_path):
                # Read the first 500 rows
                df = pd.read_csv(file_path, nrows=500)
                df['Activity'] = activity
                df['Subject'] = subject
                data_list.append(df)
            else:
                print(f"File not found: {file_path}")
    return pd.concat(data_list, ignore_index=True)

# Load and process data
combined_data_df = load_data(subjects, activities)

# Save combined DataFrame to CSV if needed
combined_data_df.to_csv('combined_taken_data.csv', index=False)

print("Data merging complete. The combined dataset (first 500 rows) has been created and saved.")


Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_LAYING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_SITTING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_STANDING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_WALKING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_WALKING_DOWNSTAIRS.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\LAKSH_WALKING_UPSTAIRS.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\RUDRA_LAYING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\RUDRA_SITTING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\RUDRA_STANDING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\RUDRA_WALKING.csv
Checking file: c:\Users\Parth\OneDrive - iitgn.ac.in\Codes\ML\task4\RUDRA_WALKING_DOWNSTAIRS.csv
Checking file: c:\Users

In [14]:
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the API key from the environment
groq_api_key = os.getenv("GROQ_API_KEY")
model_name = os.getenv("MODEL_NAME")

groq_models = {
    "llama3-70b": "llama3-70b-8192",
    "mixtral": "mixtral-8x7b-32768",
    "gemma-7b": "gemma-7b-it",
    "llama3.1-70b": "llama-3.1-70b-versatile",
    "llama3-8b": "llama3-8b-8192",
    "llama3.1-8b": "llama-3.1-8b-instant",
    "gemma-9b": "gemma2-9b-it"
}

llm = ChatGroq(model=groq_models[model_name] , api_key=groq_api_key, temperature=0.4)

>TRAINED ON GIVEN DATASET, TESTED ON COLLECTED DATA

In [17]:
import pandas as pd

# Assuming you have the test data loaded as a DataFrame
test_data_df = pd.read_csv('combined_taken_data.csv')

# Placeholder for storing results
results = []

# Define the activities
activities = ['LAYING', 'STANDING', 'WALKING', 'SITTING', 'WALKING UPSTAIRS', 'WALKING DOWNSTAIRS']

def sample_data(data, sample_rate=10):
    n=500//sample_rate
    return data[100:n+100]

i = 0

# Filter training data for specific subjects
train_subjects = [1, 3, 5, 7]
train_data_df = pd.read_csv('train_data_combined.csv')
train_data_df = train_data_df[train_data_df['Subject'].isin(train_subjects)]

# Iterate through each group by subject and activity
for (subject, activity), group in test_data_df.groupby(['Subject', 'Activity']):
    # Sample data
    accx = sample_data(group['gFx'].tolist())
    accy = sample_data(group['gFy'].tolist())
    accz = sample_data(group['gFz'].tolist())
    
    # Create prompt for the current group
    query_few_shot_Task4 = f"""
    * You are a highly accurate activity classification model.
    * Your task is to classify human activities based on the given accelerometer data.
    * The accelerometer data is provided as mean acceleration values in the x, y, and z directions.
    * You are given data corresponding to six different activities.
    * The possible activities to classify are: LAYING, STANDING, WALKING, SITTING, WALKING UPSTAIRS, and WALKING DOWNSTAIRS."""

    for (train_subject, train_activity), grp in train_data_df.groupby(['Subject', 'Activity']):
        query_few_shot_Task4 += """Here are some examples of accelerometer data and their corresponding activities:"""

        query_few_shot_Task4 += f"""
        * Activity: {train_activity}
          accx = {sample_data(grp['accx'].tolist(), 50)}
          accy = {sample_data(grp['accy'].tolist(), 50)}
          accz = {sample_data(grp['accz'].tolist(), 50)}
        """
    
    query_few_shot_Task4 += f"""
    * Analyze the accelerometer data and provide the most likely activity label for each case.
    * PRINT ONLY A WORD WHICH IS THE PREDICTED ACTIVITY AND NOTHING ELSE NO CONTENT NO REASON JUST A PREDICTION

    accx = {accx}
    accy = {accy}
    accz = {accz}
    """
    
    # Simulate model prediction (replace with actual model prediction code)
    result = llm.invoke(query_few_shot_Task4)
    
    print(activity, str(result).split(" ")[0][8:].strip("'"))
    if (activity.upper() ==str(result).split(" ")[0][8:].strip("'")):
        i+=1
        print(i)
    results.append({'Subject': subject, 'Activity': activity, 'Prediction': str(result).split(" ")[0][8:].strip("'")})

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save results to CSV if needed
results_df.to_csv('model_predictions_fewshot_4.1.csv', index=False)

print("Classification complete. Results have been saved to 'model_predictions.csv'.")

print("accuracy",i/18*100)

LAYING LAYING
1
SITTING STANDING
STANDING STANDING
2
WALKING WALKING
3
WALKING_DOWNSTAIRS WALKING_UPSTAIRS
WALKING_UPSTAIRS WALKING
LAYING LAYING
4
SITTING STANDING
STANDING STANDING
5
WALKING WALKING_UPSTAIRS
WALKING_DOWNSTAIRS WALKING
WALKING_UPSTAIRS WALKING_UPSTAIRS
6
LAYING LAYING
7
SITTING STANDING
STANDING STANDING
8
WALKING WALKING
9
WALKING_DOWNSTAIRS WALKING
WALKING_UPSTAIRS WALKING
Classification complete. Results have been saved to 'model_predictions.csv'.
accuracy 50.0


ACCURACY FOR FEW SHOT IS 50% FOR UCI-HAR TRAINED MODEL

> FEW SHOT FOR TRAIN AND TEST BOTH ON THE COLLECTED DATA SET

In [16]:
import pandas as pd

# Assuming you have the test data loaded as a DataFrame
whole_df = pd.read_csv('combined_taken_data.csv')

# Placeholder for storing results
results = []

# Define the activities
activities = ['LAYING', 'STANDING', 'WALKING', 'SITTING', 'WALKING UPSTAIRS', 'WALKING DOWNSTAIRS']

def sample_data(data, sample_rate=10):
    n=500//sample_rate
    return data[100:n+100]

i = 0

# Filter training data for specific subjects

train_data_df = whole_df[whole_df["Subject"] == "LAKSH"]
test_data_df = whole_df[whole_df["Subject"] != "LAKSH"]



# Iterate through each group by subject and activity
for (subject, activity), group in test_data_df.groupby(['Subject', 'Activity']):
    # Sample data
    accx = sample_data(group['gFx'].tolist())
    accy = sample_data(group['gFy'].tolist())
    accz = sample_data(group['gFz'].tolist())
    
    # Create prompt for the current group
    query_few_shot_Task4 = f"""
    * You are a highly accurate activity classification model.
    * Your task is to classify human activities based on the given accelerometer data.
    * The accelerometer data is provided as mean acceleration values in the x, y, and z directions.
    * You are given data corresponding to six different activities.
    * The possible activities to classify are: LAYING, STANDING, WALKING, SITTING, WALKING UPSTAIRS, and WALKING DOWNSTAIRS."""

    for (train_subject, train_activity), grp in train_data_df.groupby(['Subject', 'Activity']):
        query_few_shot_Task4 += """Here are some examples of accelerometer data and their corresponding activities:"""

        query_few_shot_Task4 += f"""
        * Activity: {train_activity}
          accx = {sample_data(grp['gFx'].tolist(), 50)}
          accy = {sample_data(grp['gFy'].tolist(), 50)}
          accz = {sample_data(grp['gFz'].tolist(), 50)}
        """
    
    query_few_shot_Task4 += f"""
    * Analyze the accelerometer data and provide the most likely activity label for each case.
    * PRINT ONLY A WORD WHICH IS THE PREDICTED ACTIVITY AND NOTHING ELSE NO CONTENT NO REASON JUST A PREDICTION

    accx = {accx}
    accy = {accy}
    accz = {accz}
    """
    
    # Simulate model prediction (replace with actual model prediction code)
    result = llm.invoke(query_few_shot_Task4)
    
    print(activity, str(result).split(" ")[0][8:].strip("'"))
    if (activity.upper() ==str(result).split(" ")[0][8:].strip("'")):
        i+=1
        print(i)
    results.append({'Subject': subject, 'Activity': activity, 'Prediction': str(result).split(" ")[0][8:].strip("'")})

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save results to CSV if needed
results_df.to_csv('model_predictions_fewshot_4.2.csv', index=False)

print("Classification complete. Results have been saved to 'model_predictions.csv'.")

print("accuracy",i/12*100)

LAYING LAYING
1
SITTING STANDING
STANDING STANDING
2
WALKING WALKING_UPSTAIRS
WALKING_DOWNSTAIRS WALKING_UPSTAIRS
WALKING_UPSTAIRS WALKING_UPSTAIRS
3
LAYING LAYING
4
SITTING STANDING
STANDING STANDING
5
WALKING WALKING_UPSTAIRS
WALKING_DOWNSTAIRS WALKING_UPSTAIRS
WALKING_UPSTAIRS WALKING_UPSTAIRS
6
Classification complete. Results have been saved to 'model_predictions.csv'.
accuracy 50.0


ACCURACY IS 50% FOR BOTH TEST AND TRAIN ON THE COLLECTED DATA