In [1]:
import pandas as pd

fitbit = pd.read_csv('Fitbit.csv')
apple_watch = pd.read_csv('AppleWatch.csv')

In [2]:
fitbit.head()

Unnamed: 0,Heart,Calories,Steps,Distance,Age,Gender,Weight,Height,Activity
0,55,2.70432,8.0,0.003666,35,M,179.0,5.6,1.Sedentary
1,54,2.92968,13.0,0.006027,35,M,179.0,5.6,1.Sedentary
2,59,2.70432,9.0,0.004163,35,M,179.0,5.6,1.Sedentary
3,58,2.70432,11.0,0.005095,35,M,179.0,5.6,1.Sedentary
4,58,1.1268,0.0,0.0,35,M,179.0,5.6,1.Sedentary


In [3]:
apple_watch.head()

Unnamed: 0,Heart,Calories,Steps,Distance,Age,Gender,Weight,Height,Activity
0,60,0.925449,16.789221,14.380363,35,M,179.0,5.6,1.Sedentary
1,72,0.369344,67.091526,56.945111,35,M,179.0,5.6,1.Sedentary
2,84,0.313061,67.091526,56.945111,35,M,179.0,5.6,1.Sedentary
3,96,0.431495,67.091526,56.945111,35,M,179.0,5.6,1.Sedentary
4,108,0.943021,67.091526,56.945111,35,M,179.0,5.6,1.Sedentary


In [4]:
apple_watch['Steps'] = apple_watch['Steps'].astype(int)
apple_watch.head()

Unnamed: 0,Heart,Calories,Steps,Distance,Age,Gender,Weight,Height,Activity
0,60,0.925449,16,14.380363,35,M,179.0,5.6,1.Sedentary
1,72,0.369344,67,56.945111,35,M,179.0,5.6,1.Sedentary
2,84,0.313061,67,56.945111,35,M,179.0,5.6,1.Sedentary
3,96,0.431495,67,56.945111,35,M,179.0,5.6,1.Sedentary
4,108,0.943021,67,56.945111,35,M,179.0,5.6,1.Sedentary


In [5]:
fitbit['Device'] = 'Fitbit'
apple_watch['Device'] = 'Apple Watch'

In [6]:
devices = pd.concat([fitbit, apple_watch], ignore_index=True)
devices.sample(n=10)

Unnamed: 0,Heart,Calories,Steps,Distance,Age,Gender,Weight,Height,Activity,Device
72439,76,0.197492,9.0,10.235705,33,M,215.0,6.0,2.Light,Apple Watch
59740,60,0.43791,0.0,0.0,22,M,170.0,5.1,1.Sedentary,Apple Watch
58397,61,0.0,0.0,0.0,22,M,170.0,5.1,1.Sedentary,Apple Watch
74053,60,0.0,0.0,0.0,22,M,193.0,5.7,0.Sleep,Apple Watch
51418,69,3.174266,38.0,33.149503,36,M,223.0,6.0,0.Sleep,Apple Watch
4221,92,5.18328,53.0,0.024544,24,F,115.0,5.4,1.Sedentary,Fitbit
24588,84,0.196742,0.0,0.0,23,F,170.0,5.1,1.Sedentary,Apple Watch
14529,68,0.538835,24.234378,17.173872,31,M,190.0,5.8,3.Moderate,Fitbit
67349,81,0.089735,41.0,33.264444,40,F,238.0,5.3,1.Sedentary,Apple Watch
39761,55,0.117229,0.0,0.0,31,M,167.0,5.7,1.Sedentary,Apple Watch


In [7]:
devices.to_csv('Devices.csv')

In [8]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


numeric_col = ['Heart', 'Calories', 'Steps', 'Distance', 'Age', 'Weight', 'Height']
categorical_col = ['Gender', 'Activity', 'Device']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_col),
        ('cat', OneHotEncoder(), categorical_col)
    ])

pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

devices_transformed = pipeline.fit_transform(devices)

column_names = (numeric_col +
                list(pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(categorical_col)))

devices_transform = pd.DataFrame(devices_transformed, columns=column_names)

devices_transform.head()

Unnamed: 0,Heart,Calories,Steps,Distance,Age,Weight,Height,Gender_F,Gender_M,Activity_0.Sleep,Activity_1.Sedentary,Activity_2.Light,Activity_3.Moderate,Activity_4.Vigorous,Device_Apple Watch,Device_Fitbit
0,-0.91746,0.875994,-0.207565,-0.433049,0.568882,-0.017819,-0.016751,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,-0.972806,0.996713,-0.011635,-0.432894,0.568882,-0.017819,-0.016751,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,-0.696074,0.875994,-0.168379,-0.433016,0.568882,-0.017819,-0.016751,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,-0.75142,0.875994,-0.090007,-0.432955,0.568882,-0.017819,-0.016751,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,-0.75142,0.030964,-0.521052,-0.43329,0.568882,-0.017819,-0.016751,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [9]:
devices_transform.to_csv('devices_transform.csv')