# Mount Google Drive

In [35]:
# Mount Google Drive to access dataset
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Unzip the dataset

In [None]:
# Unzip the dataset from the Google Drive location
!unzip "/content/drive/My Drive/UCI HAR Dataset.zip"

Archive:  /content/drive/My Drive/UCI HAR Dataset.zip
replace UCI HAR Dataset/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

## Importing libraries

In [37]:
# Import necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Read the training dataset with 561 features

In [38]:
# Read the training dataset with 561 features
train_df = pd.read_csv("UCI HAR Dataset/train/X_train.txt", delim_whitespace=True, names=list(range(0, 561)))

# Read and add the 'Subject' column to the training dataset
subject_train = pd.read_csv("UCI HAR Dataset/train/subject_train.txt", header=None)
train_df['Subject'] = subject_train.squeeze()

# Read and add the 'activity' column to the training dataset
activity_train = pd.read_csv("UCI HAR Dataset/train/y_train.txt", header=None)
train_df['activity'] = activity_train.squeeze()

# Display the first few rows of the training dataframe to check the result
print(train_df.head())


          0         1         2         3         4         5         6  \
0  0.288585 -0.020294 -0.132905 -0.995279 -0.983111 -0.913526 -0.995112   
1  0.278419 -0.016411 -0.123520 -0.998245 -0.975300 -0.960322 -0.998807   
2  0.279653 -0.019467 -0.113462 -0.995380 -0.967187 -0.978944 -0.996520   
3  0.279174 -0.026201 -0.123283 -0.996091 -0.983403 -0.990675 -0.997099   
4  0.276629 -0.016570 -0.115362 -0.998139 -0.980817 -0.990482 -0.998321   

          7         8         9  ...       553       554       555       556  \
0 -0.983185 -0.923527 -0.934724  ... -0.710304 -0.112754  0.030400 -0.464761   
1 -0.974914 -0.957686 -0.943068  ... -0.861499  0.053477 -0.007435 -0.732626   
2 -0.963668 -0.977469 -0.938692  ... -0.760104 -0.118559  0.177899  0.100699   
3 -0.982750 -0.989302 -0.938692  ... -0.482845 -0.036788 -0.012892  0.640011   
4 -0.979672 -0.990441 -0.942469  ... -0.699205  0.123320  0.122542  0.693578   

        557       558       559       560  Subject  activity  
0 -0.

# Read feature names

In [39]:
# Read feature names from the features.txt file
features = list()
with open("UCI HAR Dataset/features.txt") as f:
    for line in f:
        features.append(line.split()[1])

In [40]:
#display features
features

['tBodyAcc-mean()-X',
 'tBodyAcc-mean()-Y',
 'tBodyAcc-mean()-Z',
 'tBodyAcc-std()-X',
 'tBodyAcc-std()-Y',
 'tBodyAcc-std()-Z',
 'tBodyAcc-mad()-X',
 'tBodyAcc-mad()-Y',
 'tBodyAcc-mad()-Z',
 'tBodyAcc-max()-X',
 'tBodyAcc-max()-Y',
 'tBodyAcc-max()-Z',
 'tBodyAcc-min()-X',
 'tBodyAcc-min()-Y',
 'tBodyAcc-min()-Z',
 'tBodyAcc-sma()',
 'tBodyAcc-energy()-X',
 'tBodyAcc-energy()-Y',
 'tBodyAcc-energy()-Z',
 'tBodyAcc-iqr()-X',
 'tBodyAcc-iqr()-Y',
 'tBodyAcc-iqr()-Z',
 'tBodyAcc-entropy()-X',
 'tBodyAcc-entropy()-Y',
 'tBodyAcc-entropy()-Z',
 'tBodyAcc-arCoeff()-X,1',
 'tBodyAcc-arCoeff()-X,2',
 'tBodyAcc-arCoeff()-X,3',
 'tBodyAcc-arCoeff()-X,4',
 'tBodyAcc-arCoeff()-Y,1',
 'tBodyAcc-arCoeff()-Y,2',
 'tBodyAcc-arCoeff()-Y,3',
 'tBodyAcc-arCoeff()-Y,4',
 'tBodyAcc-arCoeff()-Z,1',
 'tBodyAcc-arCoeff()-Z,2',
 'tBodyAcc-arCoeff()-Z,3',
 'tBodyAcc-arCoeff()-Z,4',
 'tBodyAcc-correlation()-X,Y',
 'tBodyAcc-correlation()-X,Z',
 'tBodyAcc-correlation()-Y,Z',
 'tGravityAcc-mean()-X',
 'tGravityA

# Read the test dataset

In [41]:
# Read the main test dataset
test_df = pd.read_csv("UCI HAR Dataset/test/X_test.txt", delim_whitespace=True, names=list(range(0, 561)))

# Read and add the 'Subject' column to the test datase
subject_test = pd.read_csv("UCI HAR Dataset/test/subject_test.txt", header=None)
test_df['Subject'] = subject_test.squeeze()

# Read and add the 'activity' column to the test dataset
activity_test = pd.read_csv("UCI HAR Dataset/test/y_test.txt", header=None)
test_df['activity'] = activity_test.squeeze()
print(test_df.head())

          0         1         2         3         4         5         6  \
0  0.257178 -0.023285 -0.014654 -0.938404 -0.920091 -0.667683 -0.952501   
1  0.286027 -0.013163 -0.119083 -0.975415 -0.967458 -0.944958 -0.986799   
2  0.275485 -0.026050 -0.118152 -0.993819 -0.969926 -0.962748 -0.994403   
3  0.270298 -0.032614 -0.117520 -0.994743 -0.973268 -0.967091 -0.995274   
4  0.274833 -0.027848 -0.129527 -0.993852 -0.967445 -0.978295 -0.994111   

          7         8         9  ...       553       554       555       556  \
0 -0.925249 -0.674302 -0.894088  ... -0.705974  0.006462  0.162920 -0.825886   
1 -0.968401 -0.945823 -0.894088  ... -0.594944 -0.083495  0.017500 -0.434375   
2 -0.970735 -0.963483 -0.939260  ... -0.640736 -0.034956  0.202302  0.064103   
3 -0.974471 -0.968897 -0.938610  ... -0.736124 -0.017067  0.154438  0.340134   
4 -0.965953 -0.977346 -0.938610  ... -0.846595 -0.002223 -0.040046  0.736715   

        557       558       559       560  Subject  activity  
0  0.

In [42]:
# Save the processed training and test dataframes to CSV files
train_df.to_csv('train.csv',index_label=False)
test_df.to_csv('test.csv',index_label=False)

In [43]:
# Read the saved CSV files into dataframes
train_df= pd.read_csv('/content/train.csv')

In [44]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,553,554,555,556,557,558,559,560,Subject,activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,5
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,5
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,5
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,5
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,5


In [45]:
test_df= pd.read_csv('/content/test.csv')

In [46]:
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,553,554,555,556,557,558,559,560,Subject,activity
0,0.257178,-0.023285,-0.014654,-0.938404,-0.920091,-0.667683,-0.952501,-0.925249,-0.674302,-0.894088,...,-0.705974,0.006462,0.16292,-0.825886,0.271151,-0.720009,0.276801,-0.057978,2,5
1,0.286027,-0.013163,-0.119083,-0.975415,-0.967458,-0.944958,-0.986799,-0.968401,-0.945823,-0.894088,...,-0.594944,-0.083495,0.0175,-0.434375,0.920593,-0.698091,0.281343,-0.083898,2,5
2,0.275485,-0.02605,-0.118152,-0.993819,-0.969926,-0.962748,-0.994403,-0.970735,-0.963483,-0.93926,...,-0.640736,-0.034956,0.202302,0.064103,0.145068,-0.702771,0.280083,-0.079346,2,5
3,0.270298,-0.032614,-0.11752,-0.994743,-0.973268,-0.967091,-0.995274,-0.974471,-0.968897,-0.93861,...,-0.736124,-0.017067,0.154438,0.340134,0.296407,-0.698954,0.284114,-0.077108,2,5
4,0.274833,-0.027848,-0.129527,-0.993852,-0.967445,-0.978295,-0.994111,-0.965953,-0.977346,-0.93861,...,-0.846595,-0.002223,-0.040046,0.736715,-0.118545,-0.692245,0.290722,-0.073857,2,5


#Split the training data into features (X) and target (y)

In [47]:
# Split the training data into features (X) and target (y)
from sklearn.model_selection import train_test_split
x=train_df.drop('activity',axis=1)
y=train_df['activity']
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size= 0.30)


In [48]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,552,553,554,555,556,557,558,559,560,Subject
3477,0.274825,-0.013703,-0.093314,-0.997171,-0.993024,-0.996281,-0.997856,-0.993256,-0.996132,-0.943153,...,-0.235553,-0.715445,0.046544,0.763700,0.591247,0.162288,-0.895538,0.022382,0.011689,17
1696,0.284713,0.014880,-0.144542,-0.282404,0.219024,-0.450050,-0.333625,0.122858,-0.464443,-0.164490,...,-0.531205,-0.845684,0.087610,-0.612984,-0.728600,0.710930,-0.744510,0.274354,0.028908,8
4530,0.226022,-0.022994,-0.064793,0.143290,0.320146,-0.069543,0.147774,0.324122,-0.094944,0.207570,...,-0.052839,-0.359929,0.355533,-0.781268,-0.911118,-0.590709,-0.693165,0.309937,0.027002,22
6126,0.278136,-0.018466,-0.106218,-0.995414,-0.989730,-0.975947,-0.995872,-0.988527,-0.973561,-0.941164,...,-0.585900,-0.836982,-0.049677,0.054797,0.224556,0.124440,-0.879279,0.023476,-0.028330,27
4890,0.270622,-0.018371,-0.128083,-0.993389,-0.976670,-0.956375,-0.994076,-0.980837,-0.964258,-0.936420,...,-0.492321,-0.855431,0.003227,-0.164720,0.095984,-0.571969,-0.770441,0.156520,0.172082,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5428,0.299745,-0.024899,-0.127244,-0.221862,-0.155008,-0.411945,-0.250316,-0.136134,-0.352752,0.164439,...,-0.090364,-0.429137,-0.313533,-0.654112,0.975158,0.913561,-0.880335,0.143462,-0.055110,25
347,0.236013,-0.026913,0.186395,-0.854112,-0.795111,-0.605794,-0.909125,-0.790751,-0.563723,-0.682518,...,-0.302844,-0.673707,0.076072,-0.489161,-0.254101,0.084231,-0.750788,0.268367,0.044542,3
4590,0.250420,-0.002558,-0.018073,-0.960868,-0.886785,-0.920221,-0.971927,-0.887937,-0.917446,-0.837707,...,-0.607591,-0.899692,0.034445,-0.178498,-0.361697,-0.162110,-0.858034,0.043177,-0.070119,22
6052,0.281629,-0.021067,-0.132481,-0.986899,-0.885189,-0.950771,-0.989463,-0.894589,-0.948049,-0.923481,...,0.407152,0.198499,-0.087705,-0.388333,-0.005925,0.587947,-0.828205,0.216873,0.020983,27


In [49]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,552,553,554,555,556,557,558,559,560,Subject
3263,0.276574,-0.017099,-0.115345,-0.998784,-0.993346,-0.997396,-0.998874,-0.991839,-0.997391,-0.943527,...,-0.724132,-0.927046,-0.112592,-0.164960,0.484236,0.559707,-0.670573,0.218540,0.217533,17
1304,0.232420,-0.068451,-0.185742,-0.000671,0.223806,-0.126049,-0.095162,0.136644,-0.164698,0.411151,...,-0.160197,-0.542678,-0.008765,0.241652,-0.871194,-0.007761,-0.653884,0.336610,0.035325,6
4032,0.279649,-0.018096,-0.104727,-0.997583,-0.990626,-0.988209,-0.997784,-0.989296,-0.989395,-0.941369,...,-0.438712,-0.765080,-0.056047,-0.160507,-0.260349,-0.241048,-0.751471,0.105418,0.198016,21
847,0.247202,0.019419,0.267377,-0.864615,-0.864397,-0.495026,-0.897712,-0.857959,-0.447768,-0.757333,...,0.089640,-0.307401,0.006119,0.458182,0.490962,-0.881515,-0.940234,0.115975,-0.022238,5
6815,0.269932,-0.029598,-0.137233,-0.995061,-0.968045,-0.981231,-0.995962,-0.963640,-0.981232,-0.939587,...,-0.548231,-0.883777,0.056740,-0.115985,0.078402,-0.584397,-0.905673,0.163409,0.019775,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1010,0.273333,-0.043515,-0.138510,-0.982442,-0.848692,-0.915546,-0.985643,-0.850710,-0.911897,-0.922079,...,0.010018,-0.333869,-0.046867,0.222761,0.337556,-0.893036,-0.731109,0.267118,-0.061286,6
3825,0.265666,-0.020111,-0.172401,-0.929808,-0.857688,-0.910385,-0.935453,-0.844306,-0.901192,-0.898163,...,0.293602,-0.038087,0.559107,-0.033764,-0.000176,0.465328,0.106242,-0.339549,-0.517103,19
529,0.273254,-0.026321,-0.129500,-0.997514,-0.978270,-0.989602,-0.998221,-0.975785,-0.990276,-0.941013,...,-0.537599,-0.755762,-0.033249,-0.089349,0.459226,0.040911,-0.744320,0.273851,0.036708,3
1711,0.336732,-0.008510,-0.107980,-0.178925,0.288646,-0.387462,-0.204957,0.184991,-0.413729,0.036552,...,-0.599617,-0.865415,-0.614391,0.783422,0.828917,0.953421,-0.751183,0.269666,0.030041,8


In [50]:
y_train

3477    4
1696    1
4530    2
6126    4
4890    5
       ..
5428    3
347     5
4590    4
6052    5
5083    5
Name: activity, Length: 5146, dtype: int64

In [51]:
y_test

3263    5
1304    2
4032    4
847     5
6815    5
       ..
1010    5
3825    4
529     5
1711    1
1409    1
Name: activity, Length: 2206, dtype: int64

# Display the shapes of the split data

In [52]:
X_train.shape, X_test.shape

((5146, 562), (2206, 562))

In [53]:
y_train.shape, y_test.shape

((5146,), (2206,))

# Train a Logistic Regression model

In [54]:
from sklearn.linear_model import LogisticRegression
model= LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

# Model's accuracy on the training data

In [55]:
# Evaluate the model's accuracy on the training data
model.score(X_train,y_train)


0.9939759036144579

# Predictions on the test data

In [56]:
# Make predictions on the test data
pred=model.predict(X_test)

# Model's accuracy on the test data

In [57]:
# Evaluate the model's accuracy on the test data
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)

0.9845874886672711

# Classification report

In [58]:
# Print a detailed classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       368
           2       0.99      1.00      1.00       326
           3       1.00      1.00      1.00       262
           4       0.96      0.96      0.96       377
           5       0.97      0.96      0.96       438
           6       1.00      1.00      1.00       435

    accuracy                           0.98      2206
   macro avg       0.99      0.99      0.99      2206
weighted avg       0.98      0.98      0.98      2206



#Confusion matrix

In [59]:
# Compute and display the confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred)

array([[367,   1,   0,   0,   0,   0],
       [  1, 325,   0,   0,   0,   0],
       [  0,   0, 262,   0,   0,   0],
       [  0,   1,   0, 361,  15,   0],
       [  0,   0,   0,  16, 422,   0],
       [  0,   0,   0,   0,   0, 435]])

#  F1 score

In [60]:
# Compute and display the F1 score
from sklearn.metrics import f1_score
f1_score(y_test, pred, average='macro')

0.9858032563767088