## Predictive Analysis of Human Activities

In [2]:
# Converting all .txt to concatinatted .csv file
import os
import pandas as pd

# Concatenating 19 dataframes
def read_and_concat_data(activity_dir): 
    data = []
    for i in range(1, 9):
        participant_data = []
        directory = os.path.join(activity_dir, f"p{i}")
        filelist = os.listdir(directory)
        for file in filelist:
            filepath = os.path.join(directory, file)
            df = pd.read_csv(filepath, sep=",", header=None)
            participant_data.append(df)
        activity_data = pd.concat(participant_data)
        data.append(activity_data)
    return data

# creating dataset for all 8 activities
def preprocess_data(data, activity):
    for idx, df in enumerate(data):
        df.columns = ["T_xacc", "T_yacc", "T_zacc", "T_xgyro", "T_ygyro", "T_zgyro","T_xmag", "T_ymag", "T_zmag",
                     "RA_xacc", "RA_yacc", "RA_zacc", "RA_xgyro", "RA_ygyro", "RA_zgyro", "RA_xmag", "RA_ymag", "RA_zmag",
                     "LA_xacc", "LA_yacc", "LA_zacc", "LA_xgyro", "LA_ygyro", "LA_zgyro", "LA_xmag", "LA_ymag", "LA_zmag",
                     "RL_xacc", "RL_yacc", "RL_zacc", "RL_xgyro", "RL_ygyro", "RL_zgyro", "RL_xmag", "RL_ymag", "RL_zmag",
                     "LL_xacc", "LL_yacc", "LL_zacc", "LL_xgyro", "LL_ygyro", "LL_zgyro", "LL_xmag", "LL_ymag", "LL_zmag"]
        df.insert(45, 'Subject', idx+1)
        df.insert(46, 'Activity', activity)

# Combining data for all activities
activities_data = []
for i in range(1, 20):  # Adjusted for 19 activities
    activity_dir = f'/Users/omkarvilasnarkar/Downloads/Project/Predictions_of_Human_Activities/data_txt/a{i:02d}'
    activity_data = read_and_concat_data(activity_dir)
    preprocess_data(activity_data, i)  # Adjusted the preprocessing step here
    activities_data.extend(activity_data)

dataset = pd.concat(activities_data, ignore_index=True)
dataset.head()


Unnamed: 0,T_xacc,T_yacc,T_zacc,T_xgyro,T_ygyro,T_zgyro,T_xmag,T_ymag,T_zmag,RA_xacc,...,LL_yacc,LL_zacc,LL_xgyro,LL_ygyro,LL_zgyro,LL_xmag,LL_ymag,LL_zmag,Subject,Activity
0,8.0411,1.153,5.5141,-0.0241,-0.002951,0.007333,-0.78981,-0.072298,0.14805,0.61037,...,-9.0633,2.6393,-0.004407,-0.000894,-0.000569,0.74138,0.30115,-0.056264,1,1
1,8.1009,1.1382,5.3951,-0.002307,-0.005099,-0.00722,-0.78982,-0.071518,0.14791,0.62262,...,-9.0633,2.6615,-0.00811,-0.005275,0.004845,0.74128,0.30287,-0.057228,1,1
2,8.0859,1.1159,5.4321,0.001318,0.000328,-0.001767,-0.79104,-0.072855,0.15002,0.59345,...,-9.0635,2.6326,-0.014279,0.007983,-0.002314,0.74225,0.30325,-0.054814,1,1
3,8.071,1.1382,5.4545,0.001293,-0.006885,-0.005384,-0.79078,-0.070271,0.15001,0.57373,...,-9.0935,2.6328,-0.018723,0.006059,-0.007749,0.7423,0.30325,-0.054149,1,1
4,8.0708,1.1231,5.5141,-0.021358,0.000598,0.0055,-0.79211,-0.0722,0.15011,0.54932,...,-9.0935,2.618,0.007612,0.009518,-0.018648,0.73976,0.30298,-0.056217,1,1


In [4]:
# Converting the concatenated dataframe to the .csv file
dataset.to_csv('dataset.csv', index=False)
df = dataset


In [46]:
# Converting the .csv file to .parquet Azure
df.to_parquet('dataset.parquet')

In [5]:
# Identifying Rows and columns of the dataset

print(f' Rows : {df.shape[0]},\n Columns: {df.shape[1]}')

 Rows : 1140000,
 Columns: 47


In [6]:
# Columns of the dataset

df.columns

Index(['T_xacc', 'T_yacc', 'T_zacc', 'T_xgyro', 'T_ygyro', 'T_zgyro', 'T_xmag',
       'T_ymag', 'T_zmag', 'RA_xacc', 'RA_yacc', 'RA_zacc', 'RA_xgyro',
       'RA_ygyro', 'RA_zgyro', 'RA_xmag', 'RA_ymag', 'RA_zmag', 'LA_xacc',
       'LA_yacc', 'LA_zacc', 'LA_xgyro', 'LA_ygyro', 'LA_zgyro', 'LA_xmag',
       'LA_ymag', 'LA_zmag', 'RL_xacc', 'RL_yacc', 'RL_zacc', 'RL_xgyro',
       'RL_ygyro', 'RL_zgyro', 'RL_xmag', 'RL_ymag', 'RL_zmag', 'LL_xacc',
       'LL_yacc', 'LL_zacc', 'LL_xgyro', 'LL_ygyro', 'LL_zgyro', 'LL_xmag',
       'LL_ymag', 'LL_zmag', 'Subject', 'Activity'],
      dtype='object')

In [7]:
# Is there any Null values?

df.isnull().values.any()

False