# Problem Statement:

This project focuses on classifying eight distinct hand gestures using time series data from a three-axis accelerometer. The dataset includes over 4,000 samples collected from eight users, capturing variations in gesture patterns. The goal is to develop a machine learning model that accurately recognizes these gestures, enabling gesture-based interactions for improved human-computer interaction (HCI) and intuitive device control.


In [None]:
%pip install liac-arff



In [None]:
%pip install pycaret



# DATA COLLECTION and EDA



In [None]:
import pandas as pd
import numpy as np

# Load the ARFF file, handle the relational attribute
def load_arff_file(filepath):
    with open(filepath, 'r') as file:
        # Read the ARFF file content
        content = file.readlines()

    # Find the start of the data section
    data_start_index = content.index('@data\n') + 1

    # Extract attribute names (excluding relational attribute)
    attribute_lines = [line for line in content[:data_start_index - 1] if line.startswith('@attribute')]
    attribute_names = [line.split()[1] for line in attribute_lines if not line.startswith('@attribute relationalAtt')]  # Exclude relational attribute

    # Extract the data
    data_lines = content[data_start_index:]

    # Parse the data lines into a list of lists
    data = []
    max_cols = 0  # Initialize a variable to track the maximum number of columns
    for line in data_lines:
        # Assuming data is comma-separated
        values = [v.strip() for v in line.strip().split(',')]
        if values and values[0] != '':
            data.append(values)
            max_cols = max(max_cols, len(values))  # Update max_cols if current row has more columns

    # Adjust attribute_names based on the actual number of columns if needed
    if len(attribute_names) != max_cols:
        # If a mismatch is detected, create generic column names
        attribute_names = [f'col_{i}' for i in range(max_cols)]

    # Create DataFrame without the relational attribute
    df = pd.DataFrame(data, columns=attribute_names)

    # Convert numerical columns to appropriate types
    for col in attribute_names:
        try:
            df[col] = pd.to_numeric(df[col])
        except ValueError:
            pass  # Ignore columns that can't be converted

    return df

# Load the ARFF file
train_df = load_arff_file('C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestureLibrary_TRAIN.arff')


# Display the DataFrame
print(train_df.head())

# Convert DataFrame to CSV and save it
csv_file_path = 'C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestureLibrary_TRAIN.csv'
train_df.to_csv(csv_file_path, index=False)

print(f"DataFrame has been converted and saved to {csv_file_path}")

SyntaxError: invalid syntax (1262715475.py, line 4)

In [14]:
import pandas as pd
import numpy as np

# Load the ARFF file, handle the relational attribute
def load_arff_file(filepath):
    with open(filepath, 'r') as file:
        # Read the ARFF file content
        content = file.readlines()

    # Find the start of the data section
    data_start_index = content.index('@data\n') + 1

    # Extract attribute names (excluding relational attribute)
    attribute_lines = [line for line in content[:data_start_index - 1] if line.startswith('@attribute')]
    attribute_names = [line.split()[1] for line in attribute_lines if not line.startswith('@attribute relationalAtt')]  # Exclude relational attribute

    # Extract the data
    data_lines = content[data_start_index:]

    # Parse the data lines into a list of lists
    data = []
    max_cols = 0  # Initialize a variable to track the maximum number of columns
    for line in data_lines:
        # Assuming data is comma-separated
        values = [v.strip() for v in line.strip().split(',')]
        if values and values[0] != '':
            data.append(values)
            max_cols = max(max_cols, len(values))  # Update max_cols if current row has more columns

    # Adjust attribute_names based on the actual number of columns if needed
    if len(attribute_names) != max_cols:
        # If a mismatch is detected, create generic column names
        attribute_names = [f'col_{i}' for i in range(max_cols)]

    # Create DataFrame without the relational attribute
    df = pd.DataFrame(data, columns=attribute_names)

    # Convert numerical columns to appropriate types
    for col in attribute_names:
        try:
            df[col] = pd.to_numeric(df[col])
        except ValueError:
            pass  # Ignore columns that can't be converted

    return df

# Load the ARFF file
train_df = load_arff_file('C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestrueLibrary_TEST.arff')


# Display the DataFrame
print(train_df.head())

# Convert DataFrame to CSV and save it
csv_file_path = 'C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestureLibrary_TEST.csv'
train_df.to_csv(csv_file_path, index=False)

print(f"DataFrame has been converted and saved to {csv_file_path}")

        col_0     col_1     col_2     col_3     col_4     col_5     col_6  \
0   '-0.48569 -0.485690 -0.485690 -0.485690 -0.485690 -0.485690 -0.485690   
1   '-0.29459 -0.294590 -0.294590 -0.294590 -0.294590 -0.294590 -0.294590   
2  '-0.080913 -0.080913 -0.080913 -0.080913 -0.080913 -0.080913 -0.080913   
3    '0.45066  0.450660  0.450660  0.450660  0.450660  0.450660  0.450660   
4  '-0.006052 -0.006052 -0.006052 -0.006052 -0.006052 -0.006052 -0.006052   

      col_7     col_8     col_9  ...   col_934   col_935   col_936   col_937  \
0 -0.485690 -0.485690 -0.485690  ...  0.124300  0.117170  0.121190  0.134590   
1 -0.294590 -0.294590 -0.294590  ... -0.089313 -0.089313 -0.089313 -0.089313   
2 -0.080913 -0.080913 -0.080913  ...  0.596620  0.553660  0.510700  0.474050   
3  0.450660  0.450660  0.450660  ... -0.332700 -0.344260 -0.322460 -0.292150   
4 -0.006052 -0.006052 -0.006052  ...  0.211710  0.211710  0.211710  0.211710   

    col_938   col_939   col_940   col_941     col_942  c

In [16]:
import matplotlib.pyplot as plt 
import seaborn as sns

csv_file_path = 'C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestureLibrary_TRAIN.csv'
train_df = pd.read_csv(csv_file_path)

print (train_df.head())

        col_0     col_1     col_2     col_3     col_4     col_5     col_6  \
0    '0.31745  0.317450  0.317450  0.317450  0.317450  0.317450  0.317450   
1   '0.013633  0.013633  0.013633  0.013633  0.013633  0.013633  0.013633   
2    '-1.4075 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500   
3  '-0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749   
4   '-0.40006 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060   

      col_7     col_8     col_9  ...  col_934  col_935  col_936  col_937  \
0  0.317450  0.317450  0.317450  ...  0.84246  0.82818  0.81390  0.79962   
1  0.013633  0.013633  0.013633  ...  0.25528  0.25528  0.25528  0.25528   
2 -1.407500 -1.407500 -1.407500  ...  0.19882  0.24220  0.28516  0.32812   
3 -0.044749 -0.044749 -0.044749  ...  0.53957  0.57796  0.61364  0.64932   
4 -0.400060 -0.400060 -0.400060  ...  0.29726  0.26346  0.22009  0.16862   

   col_938   col_939   col_940   col_941     col_942  col_943  
0  0.78477  0.76

In [17]:
csv_file_path = 'C:\\Users\\HP\\Desktop\\UWAVE_GESTURE_RECOGNITION\\notebook\\data\\UWaveGestureLibrary_TEST.csv'
test_df = pd.read_csv(csv_file_path)

print (train_df.head())

        col_0     col_1     col_2     col_3     col_4     col_5     col_6  \
0    '0.31745  0.317450  0.317450  0.317450  0.317450  0.317450  0.317450   
1   '0.013633  0.013633  0.013633  0.013633  0.013633  0.013633  0.013633   
2    '-1.4075 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500   
3  '-0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749   
4   '-0.40006 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060   

      col_7     col_8     col_9  ...  col_934  col_935  col_936  col_937  \
0  0.317450  0.317450  0.317450  ...  0.84246  0.82818  0.81390  0.79962   
1  0.013633  0.013633  0.013633  ...  0.25528  0.25528  0.25528  0.25528   
2 -1.407500 -1.407500 -1.407500  ...  0.19882  0.24220  0.28516  0.32812   
3 -0.044749 -0.044749 -0.044749  ...  0.53957  0.57796  0.61364  0.64932   
4 -0.400060 -0.400060 -0.400060  ...  0.29726  0.26346  0.22009  0.16862   

   col_938   col_939   col_940   col_941     col_942  col_943  
0  0.78477  0.76

**Converted .arff files to .csv files for easy analysis**


In [18]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Columns: 944 entries, col_0 to col_943
dtypes: float64(940), object(4)
memory usage: 885.1+ KB


In [19]:
train_df.duplicated().sum()

0

In [20]:
print(train_df.isnull().sum())

col_0      0
col_1      0
col_2      0
col_3      0
col_4      0
          ..
col_939    0
col_940    0
col_941    0
col_942    0
col_943    0
Length: 944, dtype: int64


In [21]:
# Identify unique classes
unique_classes = train_df['col_943'].unique()
print("Unique Classes:", unique_classes)

Unique Classes: [1. 2. 3. 4. 5. 6. 7. 8.]


In [22]:
# Create a dictionary to hold segregated data
segregated_data = {class_label: train_df[train_df['col_943'] == class_label] for class_label in unique_classes}

# Display the size of each segregated DataFrame
for class_label, data in segregated_data.items():
    print(f"Class {class_label} has {len(data)} samples.")

Class 1.0 has 15 samples.
Class 2.0 has 15 samples.
Class 3.0 has 15 samples.
Class 4.0 has 15 samples.
Class 5.0 has 15 samples.
Class 6.0 has 15 samples.
Class 7.0 has 15 samples.
Class 8.0 has 15 samples.


# DATA CLEANING:

**Dataset contains 4000 data points with 8 gestures (classes) each with 15 samples, contains no empty points and duplicates.**


In [23]:
# Check for non-numeric values in the DataFrame
non_numeric_train = train_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())
non_numeric_test = test_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())

print("Non-numeric values in training set:")
print(train_df[non_numeric_train.any(axis=1)])

print("Non-numeric values in test set:")
print(test_df[non_numeric_test.any(axis=1)])


Non-numeric values in training set:
          col_0     col_1     col_2     col_3     col_4     col_5     col_6  \
0      '0.31745  0.317450  0.317450  0.317450  0.317450  0.317450  0.317450   
1     '0.013633  0.013633  0.013633  0.013633  0.013633  0.013633  0.013633   
2      '-1.4075 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500 -1.407500   
3    '-0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749 -0.044749   
4     '-0.40006 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060 -0.400060   
..          ...       ...       ...       ...       ...       ...       ...   
115   '-0.85196 -0.851960 -0.851960 -0.851960 -0.851960 -0.851960 -0.851960   
116     '-0.173 -0.173000 -0.173000 -0.173000 -0.173000 -0.173000 -0.173000   
117     '1.1111  1.111100  1.111100  1.111100  1.111100  1.111100  1.111100   
118   '-0.98835 -0.988350 -0.988350 -0.988350 -0.988350 -0.988350 -0.988350   
119   '-0.14079 -0.140790 -0.140790 -0.140790 -0.140790 -0.140790 -0.140790   

        col_7  

  non_numeric_train = train_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())
  non_numeric_test = test_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())


**The dataset includes time series data points that are non-numeric in their current format, as they are enclosed in single quotes (e.g., '0.31745') and contain newline characters (\n)**

In [24]:
# Clean and convert strings with additional handling for newlines and other anomalies
def clean_and_convert(x):
    if isinstance(x, str):
        x = x.replace("\n", "")  # Remove newline characters
        x = x.strip("'")  # Remove any single quotes
        try:
            return float(x)
        except ValueError:
            return None  # or handle as needed (e.g., return np.nan)
    return x

train_df = train_df.applymap(clean_and_convert)
test_df = test_df.applymap(clean_and_convert)


  train_df = train_df.applymap(clean_and_convert)
  test_df = test_df.applymap(clean_and_convert)


In [25]:
# Check for non-numeric values in the DataFrame
non_numeric_train = train_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())
non_numeric_test = test_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())

print("Non-numeric values in training set:")
print(train_df[non_numeric_train.any(axis=1)])

print("Non-numeric values in test set:")
print(test_df[non_numeric_test.any(axis=1)])


Non-numeric values in training set:
Empty DataFrame
Columns: [col_0, col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10, col_11, col_12, col_13, col_14, col_15, col_16, col_17, col_18, col_19, col_20, col_21, col_22, col_23, col_24, col_25, col_26, col_27, col_28, col_29, col_30, col_31, col_32, col_33, col_34, col_35, col_36, col_37, col_38, col_39, col_40, col_41, col_42, col_43, col_44, col_45, col_46, col_47, col_48, col_49, col_50, col_51, col_52, col_53, col_54, col_55, col_56, col_57, col_58, col_59, col_60, col_61, col_62, col_63, col_64, col_65, col_66, col_67, col_68, col_69, col_70, col_71, col_72, col_73, col_74, col_75, col_76, col_77, col_78, col_79, col_80, col_81, col_82, col_83, col_84, col_85, col_86, col_87, col_88, col_89, col_90, col_91, col_92, col_93, col_94, col_95, col_96, col_97, col_98, col_99, ...]
Index: []

[0 rows x 944 columns]
Non-numeric values in test set:
Empty DataFrame
Columns: [col_0, col_1, col_2, col_3, col_4, col_5, col_6, co

  non_numeric_train = train_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())
  non_numeric_test = test_df.applymap(lambda x: isinstance(x, str) and not x.replace('.', '', 1).isdigit())


In [26]:
train_df.head()

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,...,col_934,col_935,col_936,col_937,col_938,col_939,col_940,col_941,col_942,col_943
0,0.31745,0.31745,0.31745,0.31745,0.31745,0.31745,0.31745,0.31745,0.31745,0.31745,...,0.84246,0.82818,0.8139,0.79962,0.78477,0.76709,0.74094,0.71238,0.68382,1.0
1,0.013633,0.013633,0.013633,0.013633,0.013633,0.013633,0.013633,0.013633,0.013633,0.013633,...,0.25528,0.25528,0.25528,0.25528,0.25528,0.25528,0.25528,0.25528,0.25528,1.0
2,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,-1.4075,...,0.19882,0.2422,0.28516,0.32812,0.36145,0.39038,0.41931,0.44824,0.47718,1.0
3,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,-0.044749,...,0.53957,0.57796,0.61364,0.64932,0.685,0.65112,0.61545,0.57977,0.54409,1.0
4,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,-0.40006,...,0.29726,0.26346,0.22009,0.16862,0.10254,0.041917,5.8e-05,-0.036678,-0.070475,1.0
