-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_analysis.R
73 lines (53 loc) · 2.36 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#1.Merges the training and the test sets to create one data set.
#2.Extracts only the measurements on the mean and standard deviation for each measurement.
#3.Uses descriptive activity names to name the activities in the data set
#4.Appropriately labels the data set with descriptive variable names.
#Get column names
columnNames<-read.table("features.txt")
#Extract only mean and standard deviation measurements columns
columnMeanAndStd<-grep("mean|std", columnNames[,2])
#Get Subject files
subjectTest<-read.table("test/subject_test.txt")
subjectTrain<-read.table("train/subject_train.txt")
#Set Labels for Subject Datasets
names(subjectTest)<-c("subjectID")
names(subjectTrain)<-c("subjectID")
#Get X files
xTest<-read.table("test/X_test.txt")
xTrain<-read.table("train/X_train.txt")
#Get only columns with mean and standard deviation measurements
xTest<-xTest[,columnMeanAndStd]
xTrain<-xTrain[,columnMeanAndStd]
#Set labels for X datasets
names(xTest)<-columnNames[columnMeanAndStd,2]
names(xTrain)<-columnNames[columnMeanAndStd,2]
#Get Y files
yTest<-read.table("test/y_test.txt")
yTrain<-read.table("train/y_train.txt")
#Get activity labels (for Y datasets)
activityLabels<-read.table("activity_labels.txt")
#Merge Y datasets (train and test) with activity labels
yTest<-merge(activityLabels, yTest)
yTrain<-merge(activityLabels, yTrain)
#Set labels for Y datasets
names(yTest)<-c("activityCode", "activityDescription")
names(yTrain)<-c("activityCode", "activityDescription")
#Create data frame from test data
testdf<-data.frame(subjectTest, yTest, xTest)
#Create data frame from train data
traindf<-data.frame(subjectTrain, yTrain, xTrain)
#Combine test and train data frame
dataset<-rbind(testdf, traindf)
#5.From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
#Split dataset by subject and acvitity
splitdatasets<-split(dataset, list(dataset$subjectID, dataset$activityCode), drop=TRUE)
#Calculate column means (only to certain columns) and transpose
colmeans<-data.frame(t(sapply(splitdatasets, function(x) colMeans(x[, 4:82], na.rm=TRUE))))
#Get row.names
subject.activity<-row.names(colmeans)
#Delete row.names column
row.names(colmeans)<-NULL
#Create new subject.activity column
colmeans<-cbind(subject.activity, colmeans)
#Save the two tidy datasets
write.csv(colmeans, file="tidy.csv")