In [1]:
import pandas as pd
import os
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split


In [2]:
# Variables
features = ['perimeter', 'solidity', 'circularity', 'eccentricity', 'major_axis_length', 'minor_axis_length', 'Label']
continuous_features = ['perimeter', 'solidity', 'circularity', 'eccentricity', 'major_axis_length', 'minor_axis_length']
target_feature = 'Label'

TEST_SIZE = 0.25
RANDOM_STATE = 42

In [3]:
# Load the data
data = pd.read_csv('data/exported_features.csv')
data = data[features]

df = pd.DataFrame(data)

In [4]:
# Split target feature from the rest
X = df.drop('Label', axis=1)
y = df['Label']

In [5]:
# Define preprocessing pipeline
pipeline = Pipeline(steps=[
    ('scaler', RobustScaler())
    ])

# Fit the pipeline
normalized_data = pipeline.fit_transform(X)

# Put results in a DataFrame
normalized_df = pd.DataFrame(normalized_data, columns=X.columns)

# Add the target feature back
normalized_df['Label'] = y

normalized_df

Unnamed: 0,perimeter,solidity,circularity,eccentricity,major_axis_length,minor_axis_length,Label
0,-0.848487,-0.352934,0.896727,-1.837866,-1.333466,0.522725,r
1,-0.878911,-0.477691,0.852023,-1.990000,-1.418188,0.525102,r
2,-0.788467,-0.589404,0.761257,-1.764309,-1.167861,0.675744,r
3,-0.571321,-0.258747,0.754082,-0.268220,-0.281101,0.151155,r
4,-0.632256,-0.296963,0.858130,-1.909126,-1.233599,0.703428,r
...,...,...,...,...,...,...,...
128,0.666150,0.983861,-0.550939,0.728091,0.540697,-0.838676,s
129,1.093614,0.805906,-0.742775,0.962849,1.882337,-0.551481,s
130,0.491875,1.078796,-0.385757,0.792468,0.571487,-0.962210,s
131,0.385037,0.858401,-0.692802,0.658811,0.073702,-1.030649,s


In [6]:
# Export features to hand_gesture_features.csv
#Split the dataset into training and testing sets
train_X, test_X, train_y, test_y = train_test_split(normalized_df.drop(target_feature, axis=1), normalized_df[target_feature], test_size=TEST_SIZE, random_state=RANDOM_STATE)

# Merge training data back together
train = pd.concat([train_X, train_y], axis=1)

# Merge testing data back together
test = pd.concat([test_X, test_y], axis=1)

# Define file paths
train_fp = '../models/data/hand_gesture_train.csv'
test_fp = '../models/data/hand_gesture_test.csv'

if not os.path.exists(train_fp):
    train.to_csv(train_fp, index=False)
    print(f"Created and saved new file: {train_fp}")
else:
    train.to_csv(train_fp, mode='a', header=False, index=False)
    print(f"Appended data to existing file: {train_fp}")

if not os.path.exists(test_fp):
    test.to_csv(test_fp, index=False)
    print(f"Created and saved new file: {test_fp}")
else:
    test.to_csv(test_fp, mode='a', header=False, index=False)
    print(f"Appended data to existing file: {test_fp}")

Created and saved new file: ../models/data/hand_gesture_train.csv
Created and saved new file: ../models/data/hand_gesture_test.csv
