# FeatureExtraction

In [1]:
from scipy.stats import skew
import os
import csv
import pandas as pd
from tqdm import tqdm

In [2]:
def extractFeatures(data):
	mean = data.mean()
	rmax = data.max()
	rmin = data.min()
	variance = data.var()
	std = data.std()
	skew1  = skew(data)
	features = [mean, rmax, rmin, variance, std, skew1]
	return features

def makeFeatureHeader(features, colNames):
	header = []
	for col in colNames:
		for feature in features:
			header.append(col + "_" + feature)
	header.append("activity")
	return header

![dsa](dsa.png)

In [3]:
features = ["mean", "max", "min", "var", "std", "skew"]
activites = {'a01': 'sitting', 'a02': 'standing', 'a03': 'lyingBack','a04':'lyingRigh','a05':'ascendingStairs','a06':'decendingStairs', 'a07':'standingInElevatorStill','a08':'movingInElevator','a09':'walkingLot','a10':'walkingTreadmillFlat', 'a11':'walkingTreadmillIncline','a12':'runningTreadmill','a13':'stepper', 'a14':'crossTrainer', 'a15':'cyclingHorizontal','a16':'cyclingVertical','a17':'rowing','a18':'jumping','a19':'basketBall'} 
people = ['p1','p2','p3','p4','p5','p6','p7','p8']

collumNames = ["T_xacc", "T_yacc", "T_zacc", "T_xgyro","T_ygyro","T_zgyro","T_xmag", "T_ymag", "T_zmag",
"RA_xacc", "RA_yacc", "RA_zacc", "RA_xgyro", "RA_ygyro","RA_zgyro", "RA_xmag", "RA_ymag", "RA_zmag",
"LA_xacc", "LA_yacc", "LA_zacc", "LA_xgyro", "LA_ygyro","LA_zgyro", "LA_xmag", "LA_ymag", "LA_zmag",
"RL_xacc", "RL_yacc", "RL_zacc", "RL_xgyro", "RL_ygyro","RL_zgyro", "RL_xmag", "RL_ymag", "RL_zmag",
"LL_xacc", "LL_yacc", "LL_zacc", "LL_xgyro", "LL_ygyro","LL_zgyro", "LL_xmag", "LL_ymag", "LL_zmag"]


In [4]:
mainDir = "F:/HAR/DSADS/data/"

In [None]:
features_df = []
header = makeFeatureHeader(features, collumNames)
features_df.append(header)
for activity in tqdm(activites):
	for person in tqdm(people):
		segments = os.listdir(mainDir + activity + "/" + person)
		segments.sort()
		if ".DS_Store" in segments:
			segments.remove(".DS_Store")
		for segment in segments:
			row = []
			fpath = mainDir + activity + "/" + person + "/" + segment
			df = pd.read_csv(fpath, names = collumNames)
			for col in collumNames:
				features = extractFeatures(df[col])
				row.extend(features)
			row.append(activites[activity])
			features_df.append(row)

with open("features.csv", "w", newline = "") as f:
	writer = csv.writer(f)
	writer.writerows(features_df)


  0%|                                                   | 0/19 [00:00<?, ?it/s]
  0%|                                                    | 0/8 [00:00<?, ?it/s][A
 12%|█████▌                                      | 1/8 [00:02<00:16,  2.37s/it][A
 25%|███████████                                 | 2/8 [00:04<00:13,  2.24s/it][A
 38%|████████████████▌                           | 3/8 [00:06<00:11,  2.21s/it][A
 50%|██████████████████████                      | 4/8 [00:08<00:08,  2.19s/it][A
 62%|███████████████████████████▌                | 5/8 [00:11<00:06,  2.18s/it][A
 75%|█████████████████████████████████           | 6/8 [00:13<00:04,  2.19s/it][A
 88%|██████████████████████████████████████▌     | 7/8 [00:15<00:02,  2.21s/it][A
100%|████████████████████████████████████████████| 8/8 [00:17<00:00,  2.20s/it][A
  5%|██▎                                        | 1/19 [00:17<05:16, 17.60s/it]
  0%|                                                    | 0/8 [00:00<?, ?it/s][A
 12%|█████

100%|████████████████████████████████████████████| 8/8 [00:16<00:00,  2.10s/it][A
 53%|██████████████████████                    | 10/19 [02:57<02:35, 17.26s/it]
  0%|                                                    | 0/8 [00:00<?, ?it/s][A
 12%|█████▌                                      | 1/8 [00:02<00:14,  2.12s/it][A
 25%|███████████                                 | 2/8 [00:04<00:12,  2.10s/it][A
 38%|████████████████▌                           | 3/8 [00:06<00:10,  2.11s/it][A
 50%|██████████████████████                      | 4/8 [00:08<00:08,  2.11s/it][A
 62%|███████████████████████████▌                | 5/8 [00:10<00:06,  2.13s/it][A
 75%|█████████████████████████████████           | 6/8 [00:12<00:04,  2.18s/it][A
 88%|██████████████████████████████████████▌     | 7/8 [00:15<00:02,  2.26s/it][A
100%|████████████████████████████████████████████| 8/8 [00:17<00:00,  2.19s/it][A
 58%|████████████████████████▎                 | 11/19 [03:14<02:18, 17.34s/it]
  0%|     

# SplitData

In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit

In [None]:
features = pd.read_csv("features.csv")

split = StratifiedShuffleSplit(n_splits = 1, test_size = 0.2, random_state = 42)
for train_index, test_index in split.split(features, features['activity']):
	train_set = features.loc[train_index]
	test_set = features.loc[test_index]

test_set = pd.DataFrame(data = test_set)
#test_set.drop(test_set.columns[0], axis = 1)


"""
split2 = StratifiedShuffleSplit(n_splits = 1, test_size = 0.25, random_state = 42)
for training_index, validation_index in split2.split(train_set, train_set['activity']):
	training_set = train_set.loc[training_index]
	validation_set = train.loc[validation_index]


print("features: len = " + str(len(features)) + "atributes: " + str(len(features ['activity'].unique())) + "")
print("train_set: len = " + str(len(train_set)) + "atributes: " + str(len(train_set['activity'].unique())) + "")
print("test_set: len = " + str(len(test_set)) + "atributes: " + str(len(test_set['activity'].unique())) + "")
print("training_set: len = " + str(len(training_set)) + "atributes: " + str(len(training_set['activity'].unique())) + "")
print("validation_set: len = " + str(len(validation_set)) + "atributes: " + str(len(validation_set['activity'].unique())) + "")
"""

test_set.to_csv("F:/HAR/DSADS/test_set.csv")
train_set.to_csv("F:/HAR/DSADS/train_set.csv")



In [None]:
features['activity'].value_counts()