In [1]:
# Do the necessary imports

import time
import numpy    as np
import pandas   as pd
import seaborn  as sb
import matplotlib.pyplot as plt

from sklearn import pipeline      # Pipeline
from sklearn import preprocessing # OrdinalEncoder, LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn import impute
from sklearn import compose
from sklearn import model_selection # train_test_split
from sklearn import metrics         # accuracy_score, balanced_accuracy_score, plot_confusion_matrix
from sklearn import set_config

set_config(display='diagram') # Useful for display the pipeline

print('setup complete')

setup complete


In [2]:
df = pd.read_csv('Datasets/feature_eng.csv')

In [3]:
df = df.drop(['speed_mean'], axis = 1)

In [4]:
df.head()

Unnamed: 0,id,user,android_sensor_gyroscope_mean,android_sensor_accelerometer_std,android_sensor_gyroscope_std,android_sensor_gyroscope_uncalibrated_mean,android_sensor_accelerometer_max,android_sensor_linear_acceleration_mean,android_sensor_rotation_vector_mean,android_sensor_rotation_vector_max,android_sensor_accelerometer_min,android_sensor_magnetic_field_uncalibrated_min,sound_min,target,acc_gyro,Lin_speed
0,16170,Luca,0.001651,0.014626,0.000737,0.016221,9.849411,0.020978,0.050413,0.056351,9.758895,51.199707,,Still,8.3e-05,0.003784
1,15871,Luca,0.036326,1.775944,0.02029,0.039023,17.146631,0.87922,0.999981,0.999999,7.707437,82.40989,89.065143,Car,0.036325,12.906844
2,16811,Luca,0.001525,0.011199,0.000713,0.016302,9.849262,0.03411,0.610456,0.610456,9.804817,55.501802,,Still,0.000931,0.006153
3,15831,Luca,0.03644,0.862553,0.010553,0.050759,12.304298,1.488361,0.998112,0.998112,7.659674,95.664309,87.470377,Car,0.036372,21.848949
4,876,andrea,0.183202,0.504117,0.098819,0.265652,10.891645,1.658308,0.322242,0.378193,8.965621,156.795909,89.770732,Car,0.059035,24.343749


In [5]:
print(len(df))
df.nunique()

5893


id                                                5893
user                                                13
android_sensor_gyroscope_mean                     5018
android_sensor_accelerometer_std                  5861
android_sensor_gyroscope_std                      4926
android_sensor_gyroscope_uncalibrated_mean        5000
android_sensor_accelerometer_max                  5838
android_sensor_linear_acceleration_mean           4972
android_sensor_rotation_vector_mean               5002
android_sensor_rotation_vector_max                4991
android_sensor_accelerometer_min                  5833
android_sensor_magnetic_field_uncalibrated_min    4707
sound_min                                         2306
target                                               5
acc_gyro                                          5124
Lin_speed                                         4972
dtype: int64

In [6]:
df['user'].unique().tolist()

['Luca',
 'andrea',
 'Federica',
 'michelangelo',
 'Damiano',
 'Claudio',
 'Vincenzo',
 'Serena',
 'Pierpaolo',
 'IvanHeibi',
 'AndreaCarpineti',
 'Elena',
 'Riccardo']

In [7]:
#TRAIN DATA

#drop these users from train data so that the model will never see them in test

luca_index = df[df['user'] == 'Luca'].index
Ivan_index = df[df['user'] == 'IvanHeibi'].index
Pier_index = df[df['user'] == 'Pierpaolo'].index
Riccardo_index = df[df['user'] == 'Riccardo'].index

# Delete these row indexes from dataFrame
train_df = df.drop(luca_index)
train_df = train_df.drop(Ivan_index)
train_df = train_df.drop(Pier_index)
train_df = train_df.drop(Riccardo_index)

print(len(train_df))
train_df.head()

4454


Unnamed: 0,id,user,android_sensor_gyroscope_mean,android_sensor_accelerometer_std,android_sensor_gyroscope_std,android_sensor_gyroscope_uncalibrated_mean,android_sensor_accelerometer_max,android_sensor_linear_acceleration_mean,android_sensor_rotation_vector_mean,android_sensor_rotation_vector_max,android_sensor_accelerometer_min,android_sensor_magnetic_field_uncalibrated_min,sound_min,target,acc_gyro,Lin_speed
4,876,andrea,0.183202,0.504117,0.098819,0.265652,10.891645,1.658308,0.322242,0.378193,8.965621,156.795909,89.770732,Car,0.059035,24.343749
5,13480,Federica,,0.103857,,,9.520044,,,,9.149029,,57.361127,Car,,
6,18786,michelangelo,0.0041,0.013489,0.001801,0.042991,9.816197,0.027441,0.201198,0.201318,9.758751,170.521377,65.158369,Train,0.000825,0.952804
8,343,andrea,0.041554,0.255053,0.030074,0.065754,10.027802,0.278997,0.780795,0.786845,8.996134,134.611517,89.808441,Bus,0.032445,1.49781
9,4650,andrea,0.037451,0.267791,0.021391,0.030491,10.068528,0.255172,0.968082,0.968806,9.014786,140.174223,89.815738,Train,0.036255,8.860111


In [9]:
#TEST DATA

andrea_index = df[df['user'] == 'andrea'].index
federica_index = df[df['user'] == 'Federica'].index
michel_index = df[df['user'] == 'michelangelo'].index
damiano_index = df[df['user'] == 'Damiano'].index
claudio_index = df[df['user'] == 'Claudio'].index
vincenzo_index = df[df['user'] == 'Vincenzo'].index
serena_index = df[df['user'] == 'Serena'].index
andreacarpi_index = df[df['user'] == 'AndreaCarpineti'].index
elena_index = df[df['user'] == 'Elena'].index


# Delete these row indexes from dataFrame
test_df = df.drop(andrea_index)
test_df = test_df.drop(federica_index)
test_df = test_df.drop(michel_index)
test_df = test_df.drop(damiano_index)
test_df = test_df.drop(claudio_index)
test_df = test_df.drop(vincenzo_index)
test_df = test_df.drop(serena_index)
test_df = test_df.drop(andreacarpi_index)
test_df = test_df.drop(elena_index)


print(len(test_df))
test_df.head()

1439


Unnamed: 0,id,user,android_sensor_gyroscope_mean,android_sensor_accelerometer_std,android_sensor_gyroscope_std,android_sensor_gyroscope_uncalibrated_mean,android_sensor_accelerometer_max,android_sensor_linear_acceleration_mean,android_sensor_rotation_vector_mean,android_sensor_rotation_vector_max,android_sensor_accelerometer_min,android_sensor_magnetic_field_uncalibrated_min,sound_min,target,acc_gyro,Lin_speed
0,16170,Luca,0.001651,0.014626,0.000737,0.016221,9.849411,0.020978,0.050413,0.056351,9.758895,51.199707,,Still,8.3e-05,0.003784
1,15871,Luca,0.036326,1.775944,0.02029,0.039023,17.146631,0.87922,0.999981,0.999999,7.707437,82.40989,89.065143,Car,0.036325,12.906844
2,16811,Luca,0.001525,0.011199,0.000713,0.016302,9.849262,0.03411,0.610456,0.610456,9.804817,55.501802,,Still,0.000931,0.006153
3,15831,Luca,0.03644,0.862553,0.010553,0.050759,12.304298,1.488361,0.998112,0.998112,7.659674,95.664309,87.470377,Car,0.036372,21.848949
7,15268,Luca,0.140902,1.044684,0.119667,0.142145,14.202603,1.00352,0.929187,0.941615,6.431646,58.646911,,Bus,0.130924,5.38745


In [10]:
#split the features and target in train data
print(train_df.user.unique())
print(test_df.user.unique())

X = train_df.drop(['target', 'id','user'], axis = 1)
y = train_df.target

print(len(X))
print(len(y))

['andrea' 'Federica' 'michelangelo' 'Damiano' 'Claudio' 'Vincenzo'
 'Serena' 'AndreaCarpineti' 'Elena']
['Luca' 'Pierpaolo' 'IvanHeibi' 'Riccardo']
4454
4454


In [11]:
#split the features and target in test data


X_test = test_df.drop(['target', 'id','user'], axis = 1)
y_test = test_df.target

print(len(X_test))
print(len(y_test))

1439
1439


In [12]:
X_test.head()

Unnamed: 0,android_sensor_gyroscope_mean,android_sensor_accelerometer_std,android_sensor_gyroscope_std,android_sensor_gyroscope_uncalibrated_mean,android_sensor_accelerometer_max,android_sensor_linear_acceleration_mean,android_sensor_rotation_vector_mean,android_sensor_rotation_vector_max,android_sensor_accelerometer_min,android_sensor_magnetic_field_uncalibrated_min,sound_min,acc_gyro,Lin_speed
0,0.001651,0.014626,0.000737,0.016221,9.849411,0.020978,0.050413,0.056351,9.758895,51.199707,,8.3e-05,0.003784
1,0.036326,1.775944,0.02029,0.039023,17.146631,0.87922,0.999981,0.999999,7.707437,82.40989,89.065143,0.036325,12.906844
2,0.001525,0.011199,0.000713,0.016302,9.849262,0.03411,0.610456,0.610456,9.804817,55.501802,,0.000931,0.006153
3,0.03644,0.862553,0.010553,0.050759,12.304298,1.488361,0.998112,0.998112,7.659674,95.664309,87.470377,0.036372,21.848949
7,0.140902,1.044684,0.119667,0.142145,14.202603,1.00352,0.929187,0.941615,6.431646,58.646911,,0.130924,5.38745


In [13]:
print(X.nunique())
print(y.nunique())

android_sensor_gyroscope_mean                     3586
android_sensor_accelerometer_std                  4428
android_sensor_gyroscope_std                      3499
android_sensor_gyroscope_uncalibrated_mean        3567
android_sensor_accelerometer_max                  4405
android_sensor_linear_acceleration_mean           3613
android_sensor_rotation_vector_mean               3596
android_sensor_rotation_vector_max                3593
android_sensor_accelerometer_min                  4400
android_sensor_magnetic_field_uncalibrated_min    3366
sound_min                                         1942
acc_gyro                                          3692
Lin_speed                                         3613
dtype: int64
5


In [14]:
#numerical columns in the train data

num_vars  = list(X.columns)
num_vars

['android_sensor_gyroscope_mean',
 'android_sensor_accelerometer_std',
 'android_sensor_gyroscope_std',
 'android_sensor_gyroscope_uncalibrated_mean',
 'android_sensor_accelerometer_max',
 'android_sensor_linear_acceleration_mean',
 'android_sensor_rotation_vector_mean',
 'android_sensor_rotation_vector_max',
 'android_sensor_accelerometer_min',
 'android_sensor_magnetic_field_uncalibrated_min',
 'sound_min',
 'acc_gyro',
 'Lin_speed']

In [15]:
# Pipeline for Tree models

num_4_treeModels = pipeline.Pipeline(steps=[
  ('imputer', impute.SimpleImputer(missing_values=np.nan ,strategy='mean')),
  ('Normalizer',RobustScaler())
  
])


tree_prepro = compose.ColumnTransformer(transformers=[
    ('num', num_4_treeModels, num_vars),
], remainder='drop') # Drop other vars not specified in num_vars or cat_vars

tree_prepro

In [16]:
#import the tress models

from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.experimental  import enable_hist_gradient_boosting # Necesary for HistGradientBoostingClassifier
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier

In [18]:
#create a dict with the models that we want to train on

tree_classifiers = {
  "Decision Tree": DecisionTreeClassifier(),
  "Extra Trees":ExtraTreesClassifier(),
  "Random Forest":RandomForestClassifier(),
  "AdaBoost":AdaBoostClassifier(),
  "Skl GBM":GradientBoostingClassifier(),
  "Skl HistGBM":GradientBoostingClassifier(),
  "XGBoost":XGBClassifier(),
  "LightGBM":LGBMClassifier(),
  "CatBoost":CatBoostClassifier()
}

In [19]:
#pass the pipeline to everymodel

tree_classifiers = {name: pipeline.make_pipeline(tree_prepro, model) for name, model in tree_classifiers.items()}
#tree_classifiers

In [20]:
#fit the data to the model

results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})

x_train, x_val, y_train, y_val = model_selection.train_test_split(
    X, y,
    test_size=0.2,
    stratify = y,   
    random_state=37
)

for model_name, model in tree_classifiers.items():

    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
    
    pred = model.predict(X_test)  
    
    results = results.append({"Model":    model_name,
                              "Accuracy": metrics.accuracy_score(y_test, pred)*100,
                              "Bal Acc.": metrics.balanced_accuracy_score(y_test, pred)*100,
                              "Time":     total_time},
                              ignore_index=True)

    
results_ord = results.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')



Learning rate set to 0.084424
0:	learn: 1.4469197	total: 79.4ms	remaining: 1m 19s
1:	learn: 1.3252822	total: 99.4ms	remaining: 49.6s
2:	learn: 1.2295192	total: 125ms	remaining: 41.6s
3:	learn: 1.1477350	total: 153ms	remaining: 38s
4:	learn: 1.0797950	total: 171ms	remaining: 34s
5:	learn: 1.0196314	total: 190ms	remaining: 31.4s
6:	learn: 0.9657427	total: 216ms	remaining: 30.7s
7:	learn: 0.9103092	total: 244ms	remaining: 30.3s
8:	learn: 0.8710024	total: 277ms	remaining: 30.5s
9:	learn: 0.8377962	total: 298ms	remaining: 29.5s
10:	learn: 0.8049416	total: 313ms	remaining: 28.2s
11:	learn: 0.7727099	total: 338ms	remaining: 27.8s
12:	learn: 0.7439675	total: 360ms	remaining: 27.4s
13:	learn: 0.7116088	total: 378ms	remaining: 26.7s
14:	learn: 0.6859418	total: 407ms	remaining: 26.8s
15:	learn: 0.6654295	total: 433ms	remaining: 26.6s
16:	learn: 0.6427393	total: 457ms	remaining: 26.4s
17:	learn: 0.6182669	total: 476ms	remaining: 26s
18:	learn: 0.5988327	total: 501ms	remaining: 25.9s
19:	learn: 0.5

156:	learn: 0.1399335	total: 3.67s	remaining: 19.7s
157:	learn: 0.1394521	total: 3.69s	remaining: 19.7s
158:	learn: 0.1389159	total: 3.71s	remaining: 19.6s
159:	learn: 0.1385899	total: 3.73s	remaining: 19.6s
160:	learn: 0.1381912	total: 3.75s	remaining: 19.5s
161:	learn: 0.1376117	total: 3.77s	remaining: 19.5s
162:	learn: 0.1372511	total: 3.79s	remaining: 19.4s
163:	learn: 0.1368513	total: 3.81s	remaining: 19.4s
164:	learn: 0.1353599	total: 3.83s	remaining: 19.4s
165:	learn: 0.1342105	total: 3.85s	remaining: 19.3s
166:	learn: 0.1336870	total: 3.86s	remaining: 19.3s
167:	learn: 0.1333718	total: 3.89s	remaining: 19.3s
168:	learn: 0.1329040	total: 3.91s	remaining: 19.2s
169:	learn: 0.1327954	total: 3.93s	remaining: 19.2s
170:	learn: 0.1322797	total: 3.96s	remaining: 19.2s
171:	learn: 0.1319402	total: 3.99s	remaining: 19.2s
172:	learn: 0.1313632	total: 4.02s	remaining: 19.2s
173:	learn: 0.1308417	total: 4.04s	remaining: 19.2s
174:	learn: 0.1304225	total: 4.07s	remaining: 19.2s
175:	learn: 

322:	learn: 0.0854067	total: 7.35s	remaining: 15.4s
323:	learn: 0.0851937	total: 7.37s	remaining: 15.4s
324:	learn: 0.0850800	total: 7.39s	remaining: 15.3s
325:	learn: 0.0848587	total: 7.41s	remaining: 15.3s
326:	learn: 0.0846231	total: 7.43s	remaining: 15.3s
327:	learn: 0.0843731	total: 7.45s	remaining: 15.3s
328:	learn: 0.0841864	total: 7.47s	remaining: 15.2s
329:	learn: 0.0840855	total: 7.5s	remaining: 15.2s
330:	learn: 0.0838897	total: 7.51s	remaining: 15.2s
331:	learn: 0.0838219	total: 7.54s	remaining: 15.2s
332:	learn: 0.0835791	total: 7.56s	remaining: 15.2s
333:	learn: 0.0833465	total: 7.58s	remaining: 15.1s
334:	learn: 0.0832511	total: 7.6s	remaining: 15.1s
335:	learn: 0.0831525	total: 7.62s	remaining: 15.1s
336:	learn: 0.0829548	total: 7.64s	remaining: 15s
337:	learn: 0.0828722	total: 7.66s	remaining: 15s
338:	learn: 0.0827322	total: 7.68s	remaining: 15s
339:	learn: 0.0826042	total: 7.7s	remaining: 14.9s
340:	learn: 0.0822733	total: 7.72s	remaining: 14.9s
341:	learn: 0.0820248

489:	learn: 0.0610799	total: 11s	remaining: 11.4s
490:	learn: 0.0610208	total: 11s	remaining: 11.4s
491:	learn: 0.0608922	total: 11s	remaining: 11.4s
492:	learn: 0.0608204	total: 11s	remaining: 11.3s
493:	learn: 0.0606541	total: 11s	remaining: 11.3s
494:	learn: 0.0605438	total: 11.1s	remaining: 11.3s
495:	learn: 0.0603517	total: 11.1s	remaining: 11.3s
496:	learn: 0.0602123	total: 11.1s	remaining: 11.2s
497:	learn: 0.0601346	total: 11.1s	remaining: 11.2s
498:	learn: 0.0599994	total: 11.1s	remaining: 11.2s
499:	learn: 0.0599285	total: 11.2s	remaining: 11.2s
500:	learn: 0.0598520	total: 11.2s	remaining: 11.1s
501:	learn: 0.0597817	total: 11.2s	remaining: 11.1s
502:	learn: 0.0595920	total: 11.2s	remaining: 11.1s
503:	learn: 0.0594192	total: 11.3s	remaining: 11.1s
504:	learn: 0.0593412	total: 11.3s	remaining: 11.1s
505:	learn: 0.0591482	total: 11.3s	remaining: 11s
506:	learn: 0.0590153	total: 11.3s	remaining: 11s
507:	learn: 0.0589230	total: 11.3s	remaining: 11s
508:	learn: 0.0587866	total:

656:	learn: 0.0469154	total: 14.6s	remaining: 7.6s
657:	learn: 0.0468474	total: 14.6s	remaining: 7.58s
658:	learn: 0.0467831	total: 14.6s	remaining: 7.56s
659:	learn: 0.0466878	total: 14.6s	remaining: 7.54s
660:	learn: 0.0466361	total: 14.6s	remaining: 7.51s
661:	learn: 0.0465906	total: 14.7s	remaining: 7.5s
662:	learn: 0.0465390	total: 14.7s	remaining: 7.47s
663:	learn: 0.0464656	total: 14.7s	remaining: 7.45s
664:	learn: 0.0464115	total: 14.7s	remaining: 7.43s
665:	learn: 0.0463427	total: 14.8s	remaining: 7.41s
666:	learn: 0.0462685	total: 14.8s	remaining: 7.39s
667:	learn: 0.0461886	total: 14.8s	remaining: 7.37s
668:	learn: 0.0461310	total: 14.8s	remaining: 7.34s
669:	learn: 0.0460976	total: 14.9s	remaining: 7.32s
670:	learn: 0.0460508	total: 14.9s	remaining: 7.3s
671:	learn: 0.0459909	total: 14.9s	remaining: 7.27s
672:	learn: 0.0459433	total: 14.9s	remaining: 7.25s
673:	learn: 0.0458832	total: 14.9s	remaining: 7.23s
674:	learn: 0.0458455	total: 15s	remaining: 7.21s
675:	learn: 0.045

824:	learn: 0.0373930	total: 18.2s	remaining: 3.87s
825:	learn: 0.0372931	total: 18.2s	remaining: 3.84s
826:	learn: 0.0372742	total: 18.3s	remaining: 3.82s
827:	learn: 0.0372475	total: 18.3s	remaining: 3.8s
828:	learn: 0.0372200	total: 18.3s	remaining: 3.78s
829:	learn: 0.0371995	total: 18.3s	remaining: 3.75s
830:	learn: 0.0371612	total: 18.3s	remaining: 3.73s
831:	learn: 0.0371179	total: 18.4s	remaining: 3.71s
832:	learn: 0.0370794	total: 18.4s	remaining: 3.69s
833:	learn: 0.0370411	total: 18.4s	remaining: 3.66s
834:	learn: 0.0370013	total: 18.4s	remaining: 3.64s
835:	learn: 0.0369397	total: 18.5s	remaining: 3.62s
836:	learn: 0.0369146	total: 18.5s	remaining: 3.6s
837:	learn: 0.0368943	total: 18.5s	remaining: 3.57s
838:	learn: 0.0368569	total: 18.5s	remaining: 3.55s
839:	learn: 0.0366914	total: 18.5s	remaining: 3.53s
840:	learn: 0.0366442	total: 18.6s	remaining: 3.51s
841:	learn: 0.0366019	total: 18.6s	remaining: 3.49s
842:	learn: 0.0365464	total: 18.6s	remaining: 3.46s
843:	learn: 0.

983:	learn: 0.0310777	total: 21.6s	remaining: 352ms
984:	learn: 0.0310453	total: 21.6s	remaining: 330ms
985:	learn: 0.0309819	total: 21.7s	remaining: 308ms
986:	learn: 0.0309497	total: 21.7s	remaining: 286ms
987:	learn: 0.0309353	total: 21.7s	remaining: 264ms
988:	learn: 0.0308669	total: 21.7s	remaining: 242ms
989:	learn: 0.0308019	total: 21.8s	remaining: 220ms
990:	learn: 0.0307664	total: 21.8s	remaining: 198ms
991:	learn: 0.0307407	total: 21.8s	remaining: 176ms
992:	learn: 0.0307303	total: 21.8s	remaining: 154ms
993:	learn: 0.0306302	total: 21.9s	remaining: 132ms
994:	learn: 0.0306070	total: 21.9s	remaining: 110ms
995:	learn: 0.0305724	total: 21.9s	remaining: 87.9ms
996:	learn: 0.0305360	total: 21.9s	remaining: 65.9ms
997:	learn: 0.0305000	total: 21.9s	remaining: 44ms
998:	learn: 0.0304735	total: 22s	remaining: 22ms
999:	learn: 0.0304393	total: 22s	remaining: 0us


Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,CatBoost,80.681028,79.908306,22.606813
2,LightGBM,76.997915,80.11974,1.793438
3,Random Forest,70.882557,68.26179,2.05422
4,AdaBoost,69.701181,56.314709,1.367148
5,Decision Tree,68.589298,67.922786,0.10147
6,XGBoost,66.643502,69.931559,4.579894
7,Extra Trees,54.621265,54.044808,0.704743
8,Skl HistGBM,54.343294,58.158814,10.287772
9,Skl GBM,54.134816,58.023114,10.787876


In [22]:
from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(y_test,pred))
print(classification_report(y_test, pred))

[[160 160   0   4   8]
 [  1 140   0   4   0]
 [  0   0 304   0   4]
 [  0  74   0 116   0]
 [  4   1  18   0 441]]
              precision    recall  f1-score   support

         Bus       0.97      0.48      0.64       332
         Car       0.37      0.97      0.54       145
       Still       0.94      0.99      0.97       308
       Train       0.94      0.61      0.74       190
     Walking       0.97      0.95      0.96       464

    accuracy                           0.81      1439
   macro avg       0.84      0.80      0.77      1439
weighted avg       0.90      0.81      0.82      1439



In [None]:
# Checking outliers through boxplots
xdata=data.drop(data.columns[-1],axis=1)
ydata=data[data.columns[-1]]

# Interquartile range
Q1 = xdata.quantile(0.25)
Q3 = xdata.quantile(0.75)
IQR = Q3 - Q1
v=1.5 # Pondering of IQR

# Remove outliers 
df_no_out_x=xdata[~((xdata < (Q1 - v * IQR)) | (xdata > (Q3 + v * IQR))).any(axis=1)]
df_no_out_y=ydata[~((xdata < (Q1 - v * IQR)) | (xdata > (Q3 + v * IQR))).any(axis=1)]

# Reset index in order to concat dfs
new_idxs=pd.RangeIndex(len(df_no_out_y.index))
df_no_out_x.index=new_idxs
df_no_out_y.index=new_idxs

df_no_out=pd.concat([df_no_out_x,df_no_out_y], axis=1)

df_no_out.shape