In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support

In [2]:
def split_dataframe_to_windows(df, window_length, p1, p2):
    windows = []
    start = 0
    end = 0
    windows_label = []
    window_times = []
    while end < len(df) - 1:
        end = start
        while end < len(df) and df.iloc[end]['time'] - df.iloc[start]['time'] < window_length:
            end += 1
        window = df.iloc[start:end]
        if end != len(df) or df.iloc[end-1]['time'] - df.iloc[start]['time'] >= window_length:
            windows.append(window)
            window_times.append([df.iloc[start]['time'], df.iloc[end-1]['time']])
            if df.iloc[start]['time'] < float(p1):
                windows_label.append(0)
            elif df.iloc[end]['time'] > float(p2):
                windows_label.append(0)
            else:
                windows_label.append(1)
        start = end

    return windows, windows_label, window_times

In [3]:
def calculate_rise_fall_times(windows):
    rise_times_list = []
    fall_times_list = []
    for window in windows:
        rise_times = {'r_x': 0, 'r_y': 0, 'r_z': 0, 'r_mag': 0}
        fall_times = {'f_x': 0, 'f_y': 0, 'f_z': 0, 'f_mag': 0}
        for column in ['x', 'y', 'z', 'mag']:
            for i in range(1, len(window)):
                if window.iloc[i][column] > window.iloc[i-1][column]:
                    rise_times["r_"+column] += abs(window.iloc[i]['time'] - window.iloc[i-1]['time'])
                elif window.iloc[i][column] < window.iloc[i-1][column]:
                    fall_times["f_"+column] += abs(window.iloc[i]['time'] - window.iloc[i-1]['time'])
        rise_times_list.append(rise_times)
        fall_times_list.append(fall_times)

    return rise_times_list, fall_times_list

In [4]:
def calculate_rise_fall_times_optimized(windows):
    rise_times_list = []
    fall_times_list = []
    time_step = 0.01
    for window in windows:
        rise_times = {'r_x': 0, 'r_y': 0, 'r_z': 0, 'r_mag': 0}
        fall_times = {'f_x': 0, 'f_y': 0, 'f_z': 0, 'f_mag': 0}

        for i in range(1, len(window)):
            prev_row = window.iloc[i - 1]
            curr_row = window.iloc[i]

            for column in ['x', 'y', 'z', 'mag']:
                if curr_row[column] > prev_row[column]:
                    rise_times["r_"+column] += time_step
                elif curr_row[column] < prev_row[column]:
                    fall_times["f_"+column] += time_step
        rise_times_list.append(rise_times)
        fall_times_list.append(fall_times)

    return rise_times_list, fall_times_list


In [23]:
import scipy.stats as stat
def calculate_statistics(windows):
    statistics_list = []

    for window in windows:
        stats = {}
        for column in ['x', 'y', 'z', 'mag']:
            series = window[column]
            stats[column + '_mean'] = np.mean(series)
            stats[column + '_median'] = np.median(series)
            stats[column + '_std'] = np.std(series)
            stats[column + '_var'] = np.var(series)
            stats[column + '_min'] = np.min(series)
            stats[column + '_max'] = np.max(series)
            stats[column + '_range'] = np.ptp(series)
            stats[column + '_skew'] = stat.skew(series)
            stats[column + '_kurtosis'] = stat.kurtosis(series)
            stats[column + '_q1'] = np.percentile(series, 25)
            stats[column + '_q3'] = np.percentile(series, 75)
            stats[column + '_iqr'] = stat.iqr(series)
        statistics_list.append(stats)

    return statistics_list

In [27]:
def calculate_statistics_selected(windows):
    statistics_list = []

    for window in windows:
        stats = {}
        for column in ['x', 'y', 'z']:
            series = window[column]
            stats[column + '_mean'] = np.mean(series)
            stats[column + '_median'] = np.median(series)
            stats[column + '_std'] = np.std(series)
            stats[column + '_var'] = np.var(series)
            stats[column + '_min'] = np.min(series)
            stats[column + '_max'] = np.max(series)
            stats[column + '_range'] = np.ptp(series)
            stats[column + '_q1'] = np.percentile(series, 25)
            stats[column + '_q3'] = np.percentile(series, 75)
            stats[column + '_iqr'] = stat.iqr(series)
        statistics_list.append(stats)

    return statistics_list

In [6]:
def duration_above_below_ma(windows):
    above_times_list = []
    below_times_list = []
    time_step = 0.01
    ma_window = 5
    for window in windows:
        above_times = {'a_x': 0, 'a_y': 0, 'a_z': 0, 'a_mag': 0}
        below_times = {'b_x': 0, 'b_y': 0, 'b_z': 0, 'b_mag': 0}

        for column in ['x', 'y', 'z', 'mag']:
            ma = window[column].rolling(window=ma_window).mean()
            for i in range(ma_window, len(window)):
                if window.iloc[i][column] > ma.iloc[i]:
                    above_times["a_"+column] += time_step
                elif window.iloc[i][column] < ma.iloc[i]:
                    below_times["b_"+column] += time_step
        above_times_list.append(above_times)
        below_times_list.append(below_times)

    return above_times_list, below_times_list

In [7]:
import numpy as np
def calculate_rise_fall_times_optimized_v3(windows):

    time_step = 0.01
    rise_times_list = []
    fall_times_list = []
    time_step = 0.01
    for window in windows:
        window_np = window.to_numpy()
        derivatives = np.diff(window_np, axis=0)
        rise_times = {'r_x': 0, 'r_y': 0, 'r_z': 0, 'r_mag': 0}
        fall_times = {'f_x': 0, 'f_y': 0, 'f_z': 0, 'f_mag': 0}
        for i in range(derivatives.shape[0]):
            for idx, column in enumerate(['x', 'y', 'z']): # moze add mag
                if derivatives[i, idx] > 0:
                    rise_times["r_"+column] += time_step
                elif derivatives[i, idx] < 0:
                    fall_times["f_"+column] += time_step
        rise_times_list.append(rise_times)
        fall_times_list.append(fall_times)

    return rise_times_list, fall_times_list


In [8]:
# function for setting train data
def get_indexes(window_times, labels, safe_length_side, safe_length_middle):
    indexes = []
    middle_part = round(len(window_times)/2)
    middle_sec = (window_times[middle_part][0] + window_times[middle_part][1])/2
    for i, row in enumerate(window_times):
        if (row[1] <= safe_length_side) and labels[i]==0:
            indexes.append(i)
        elif (row[0] >= (window_times[-1][0] - safe_length_side)) and labels[i]==0:
            indexes.append(i)
        elif (row[0] >= (middle_sec - safe_length_middle)) and (row[1] <= (middle_sec + safe_length_middle)) and labels[i]==1:
            indexes.append(i)
    return indexes

In [9]:
def modify_list(input_list):
    first_one = input_list.index(1)
    last_one = len(input_list) - 1 - input_list[::-1].index(1)
    modified_list = input_list.copy()
    for i in range(first_one, last_one + 1):
        modified_list[i] = 1
    return modified_list

In [10]:
def modify_list2(input_list):
    first_one = input_list.index(1)
    last_one = len(input_list) - 1 - input_list[::-1].index(1)
    modified_list = input_list.copy()
    modified_list[first_one] = 0
    modified_list[last_one] = 0
    return modified_list

In [11]:
list1 = pd.read_csv('corrupted_signals.csv', sep=';', decimal=",").iloc[:,0].values.tolist()
list2 = pd.read_csv('inside_startend_points.csv', sep=';', decimal=",").drop(["p1","p2"], axis=1).iloc[:,0].values.tolist()
files_drop = list1 + list2
print(files_drop)

['InterpolationAccelerometerMichalWalking17.csv', 'InterpolationAccelerometerMichalWalking16.csv', 'InterpolationAccelerometerMichalWalking15.csv', 'InterpolationAccelerometerMichalWalking14.csv', 'InterpolationAccelerometerMichalWalking13.csv', 'InterpolationAccelerometerMichalWalking9.csv', 'InterpolationAccelerometerMichalWalking.csv', 'InterpolationAccelerometerMichalWalking20.csv', 'InterpolationAccelerometerMichalWalking19.csv', 'InterpolationAccelerometerMichalWalking18.csv', 'InterpolationAccelerometerMichalWalking12.csv', 'InterpolationAccelerometerMichalWalking11.csv', 'InterpolationAccelerometerMichalWalking10.csv', 'InterpolationAccelerometerMichalWalking8.csv', 'InterpolationAccelerometerMichalWalking7.csv', 'InterpolationAccelerometerMichalWalking6.csv', 'InterpolationAccelerometerMichalWalking6.csv', 'InterpolationAccelerometerMichalWalking4.csv', 'InterpolationAccelerometerMichalWalking2.csv']


In [12]:
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
lista_modeli =[
    [GaussianNB(), "NB_default"],
    [LogisticRegression(), "Logistic_Regression"],
    [DecisionTreeClassifier(criterion='gini'), "Decision_Tree_Gini"],
    [DecisionTreeClassifier(criterion='entropy'), "Decision_Tree_entropy"],
    # [LinearSVC(random_state=42, tol=1e-5, class_weight={0:0.9, 1:0.1}), "LinearSVC_default"],
    [SVC(kernel='poly', degree=5, probability=False, class_weight={0:0.9, 1:0.1}), "SVC_p5"],
    [SVC(kernel='poly', degree=4, probability=False, class_weight={0:0.9, 1:0.1}), "SVC_p4"],
    [SVC(kernel='poly', degree=3, probability=False, class_weight={0:0.9, 1:0.1}), "SVC_p3"],
    [SVC(C=0.00001, kernel='poly', degree=5, probability=False, class_weight={0:0.9, 1:0.1}), "SVC_optimized"],
    [RandomForestClassifier(class_weight={0:0.9, 1:0.1}), "RF_default"],
    [KNeighborsClassifier(n_neighbors=1), "knn_1"],
    [KNeighborsClassifier(n_neighbors=1), "knn_2"],
    [KNeighborsClassifier(n_neighbors=1), "knn_3"],
    [KNeighborsClassifier(n_neighbors=1), "knn_4"],
]

In [28]:
for clf, model_name in lista_modeli:
    print(model_name)
    foldername = "fourth-run"
    input_data = pd.read_csv('startend_points.csv', sep=';', decimal=",")
    results = pd.DataFrame()
    for idx, row in input_data.iterrows():
        if row['filename'] in files_drop:
            continue
        print(f"filename: {row['filename']} p1: {row['p1']} p2: {row['p2']}")
        temp = pd.read_csv(os.path.join(foldername, row['filename']))
        activity = temp['activity'].unique()[0]
        temp = temp[['time','x','y','z','mag']]
        temp['time'] = (temp['time'] - temp['time'].iloc[0])/1000000000
        windows, labels, window_times = split_dataframe_to_windows(temp, window_length=0.5, p1=row['p1'], p2=row['p2'])
        # windows1 = []
        # for i, window in enumerate(windows):
        #     if i <= len(windows)/2:
        #         windows1.append(window)
        #     else:
        #         windows1.append(window.iloc[::-1])


        # rise_times, fall_times = calculate_rise_fall_times_optimized(windows)
        # # rise_times1, fall_times1 = duration_above_below_ma(windows1)
        # p1 = pd.DataFrame.from_dict(rise_times).reset_index().drop("r_mag", axis=1)
        # p2 = pd.DataFrame.from_dict(fall_times).reset_index().drop("f_mag", axis=1)
        # # p11 = pd.DataFrame.from_dict(rise_times1).reset_index().drop("a_mag", axis=1)
        # # p22 = pd.DataFrame.from_dict(fall_times1).reset_index().drop("b_mag", axis=1)
        # X = p1.merge(p2).drop("index", axis=1)
        # # X = p1.merge(p2).merge(p11).merge(p22).drop("index", axis=1)
        statistics = calculate_statistics_selected(windows)
        p1 = pd.DataFrame.from_dict(statistics).reset_index()
        X = p1.drop("index", axis=1)

        train_idx = get_indexes(window_times, labels, 3.0, 3.0)
        clf.fit(X.iloc[train_idx], [labels[i] for i in train_idx])
        y_pred = clf.predict(X)
        try:
            raw_results = precision_recall_fscore_support(labels, y_pred, zero_division=0)
            y_pred1 = modify_list(y_pred.tolist())
            mod_results = precision_recall_fscore_support(labels, y_pred1, zero_division=0)
            y_pred2 = modify_list2(y_pred1)
            mod2_results = precision_recall_fscore_support(labels, y_pred2, zero_division=0)
            row_result_dict = {'name': row['filename'], 'activity': activity,
                          'raw_p0': raw_results[0][0], 'raw_r0': raw_results[0][1], 'raw_p1': raw_results[1][0], 'raw_r1': raw_results[1][1],
                          'mod1_p0': mod_results[0][0], 'mod1_r0': mod_results[0][1], 'mod1_p1': mod_results[1][0], 'mod1_r1': mod_results[1][1],
                          'mod2_p0': mod2_results[0][0], 'mod2_r0': mod2_results[0][1], 'mod2_p1': mod2_results[1][0], 'mod2_r1': mod2_results[1][1],
                          }
        except ValueError:
            row_result_dict = {'name': row['filename'], 'activity': activity,
                          'raw_p0': 0.0, 'raw_r0': 0.0, 'raw_p1': 0.0, 'raw_r1': 0.0,
                          'mod1_p0': 0.0, 'mod1_r0': 0.0, 'mod1_p1': 0.0, 'mod1_r1': 0.0,
                          'mod2_p0': 0.0, 'mod2_r0': 0.0, 'mod2_p1': 0.0, 'mod2_r1': 0.0,
                          }
        temp_df = pd.DataFrame(row_result_dict, index=[0])
        results = pd.concat([results,temp_df]).reset_index(drop=True)
    results.head()
    results.to_csv('comparison/'+model_name+'_sel_features.csv', index=False)

NB_default
filename: InterpolationAccelerometerKubaDownstairs3Apple.csv p1: 3.4 p2: 75.0
filename: InterpolationAccelerometerKubaDownstairs2Apple.csv p1: 4.0 p2: 69.0
filename: InterpolationAccelerometerKubaSquatsApple.csv p1: 3.8 p2: 124.0
filename: InterpolationAccelerometerKubaSittingApple.csv p1: 4.8 p2: 332.0
filename: InterpolationAccelerometerKubaSquats2Apple.csv p1: 7.6 p2: 132.3
filename: InterpolationAccelerometerKubaSquats3Apple.csv p1: 3.9 p2: 126.1
filename: InterpolationAccelerometerKubaStanding2Apple.csv p1: 3.2 p2: 127.6
filename: InterpolationAccelerometerKubaStanding3Apple.csv p1: 5.0 p2: 125.0
filename: InterpolationAccelerometerKubaWalking2Apple.csv p1: 4.6 p2: 579.6
filename: InterpolationAccelerometerKubaDownstairsApple.csv p1: 10.4 p2: 71.5
filename: InterpolationAccelerometerKubaSitting2Apple.csv p1: 4.7 p2: 295.3
filename: InterpolationAccelerometerKubaUpstairsApple.csv p1: 3.3 p2: 67.0
filename: InterpolationAccelerometerKubaWalkingApple.csv p1: 3.5 p2: 340.2




filename: InterpolationAccelerometerKarolJogging2.csv p1: 2.1 p2: 133.3
filename: InterpolationAccelerometerKarolWalking.csv p1: 2.0 p2: 128.5
filename: InterpolationAccelerometerKarolWalking2.csv p1: 2.7 p2: 130.3
filename: InterpolationAccelerometerKarolSquats.csv p1: 2.7 p2: 128.3
filename: InterpolationAccelerometerKarolUpstairs.csv p1: 2.6 p2: 111.7
filename: InterpolationAccelerometerKarolUpstairs2.csv p1: 2.4 p2: 113.8
filename: InterpolationAccelerometerKarolDownstairs.csv p1: 3.8 p2: 99.8
filename: InterpolationAccelerometerKarolDownstairs2.csv p1: 2.3 p2: 99.0
filename: InterpolationAccelerometerMichalWalking5.csv p1: 4.0 p2: 119.0
filename: InterpolationAccelerometerMichalWalking3.csv p1: 6.0 p2: 142.0
filename: InterpolationAccelerometerMichalUpstairs3.csv p1: 7.0 p2: 42.5
filename: InterpolationAccelerometerMichalUpstairs2.csv p1: 7.0 p2: 28.0
filename: InterpolationAccelerometerMichalUpstairs.csv p1: 4.1 p2: 35.0
filename: InterpolationAccelerometerMichalSquats4.csv p1: 4

In [29]:
import os

summary = []

for filename in os.listdir("comparison"):
    if filename.endswith(".csv"):

        df = pd.read_csv(os.path.join("comparison", filename))
        df = df.drop(['name', 'activity'], axis=1)

        zero_rows = (df == 0).all(axis=1).sum()

        # Usuń wiersze, które zawierają zera
        df_no_zeros = df[(df != 0).all(axis=1)]

        # Wylicz średnią z każdej kolumny
        means = df_no_zeros.mean()

        # Stwórz słownik zawierający nazwę pliku, liczbę wierszy z zerami oraz średnie wartości z każdej kolumny
        file_summary = {'filename': filename, 'zero_rows': zero_rows}
        file_summary.update(means.to_dict())

        # Dodaj wyniki do listy podsumowań
        summary.append(file_summary)

# Stwórz DataFrame z podsumowaniem
summary_df = pd.DataFrame(summary)

# Wyświetl podsumowanie
summary_df.head(100)

Unnamed: 0,filename,zero_rows,raw_p0,raw_r0,raw_p1,raw_r1,mod1_p0,mod1_r0,mod1_p1,mod1_r1,mod2_p0,mod2_r0,mod2_p1,mod2_r1
0,Decision_Tree_entropy.csv,0,0.176918,0.973016,0.886175,0.5839,0.926618,0.973659,0.803506,0.992164,0.855095,0.977833,0.846461,0.984274
1,Decision_Tree_entropy_all_features.csv,0,0.54245,0.982454,0.893054,0.872137,0.948903,0.975445,0.837495,0.988735,0.87629,0.980281,0.884936,0.981615
2,Decision_Tree_entropy_sel_features.csv,0,0.511376,0.98084,0.888021,0.867031,0.936674,0.976511,0.840142,0.989411,0.863341,0.981068,0.883757,0.981933
3,Decision_Tree_Gini.csv,0,0.182177,0.973989,0.889082,0.582451,0.915151,0.973722,0.805392,0.992257,0.851477,0.978109,0.851099,0.984616
4,Decision_Tree_Gini_all_features.csv,0,0.504531,0.983843,0.90033,0.866921,0.94773,0.977384,0.851639,0.991006,0.868146,0.981511,0.890933,0.983102
5,Decision_Tree_Gini_sel_features.csv,0,0.525803,0.981845,0.890814,0.878555,0.932758,0.976316,0.842687,0.986026,0.860144,0.980789,0.885237,0.978526
6,knn_1.csv,0,0.18216,0.971576,0.86938,0.615503,0.923263,0.969091,0.756201,0.993442,0.862373,0.973972,0.807208,0.986559
7,knn_1_all_features.csv,0,0.542756,0.978249,0.851606,0.898061,0.961093,0.972246,0.795074,0.996731,0.891318,0.977327,0.846312,0.989914
8,knn_1_r.csv,0,0.174356,0.970237,0.869284,0.599386,0.927301,0.969236,0.769737,0.993214,0.859137,0.973728,0.815149,0.985905
9,knn_1_sel_features.csv,0,0.727896,0.979492,0.860052,0.942719,0.949038,0.975156,0.828675,0.987797,0.880597,0.980268,0.880289,0.980953


In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support

clf = RandomForestClassifier()

foldername = "fourth-run"
input_data = pd.read_csv('startend_points.csv', sep=';', decimal=",")
results = pd.DataFrame()

feature_importances_df = pd.DataFrame()

for idx, row in input_data.iterrows():
    if row['filename'] in files_drop:
        continue
    print(f"filename: {row['filename']} p1: {row['p1']} p2: {row['p2']}")
    temp = pd.read_csv(os.path.join(foldername, row['filename']))
    activity = temp['activity'].unique()[0]
    temp = temp[['time','x','y','z','mag']]
    temp['time'] = (temp['time'] - temp['time'].iloc[0])/1000000000
    windows, labels, window_times = split_dataframe_to_windows(temp, window_length=0.5, p1=row['p1'], p2=row['p2'])

    statistics = calculate_statistics(windows)
    p1 = pd.DataFrame.from_dict(statistics).reset_index()
    X = p1.drop("index", axis=1)

    train_idx = get_indexes(window_times, labels, 3.0, 3.0)
    clf.fit(X.iloc[train_idx], [labels[i] for i in train_idx])

    feature_importances = pd.DataFrame(clf.feature_importances_,
                                       index = X.columns,
                                       columns=['importance']).sort_values('importance', ascending=False)
    feature_importances_df = pd.concat([feature_importances_df, feature_importances], axis=0)


average_importances = feature_importances_df.groupby(feature_importances_df.index).mean().sort_values('importance', ascending=False)

print(average_importances)


filename: InterpolationAccelerometerKubaDownstairs3Apple.csv p1: 3.4 p2: 75.0
filename: InterpolationAccelerometerKubaDownstairs2Apple.csv p1: 4.0 p2: 69.0
filename: InterpolationAccelerometerKubaSquatsApple.csv p1: 3.8 p2: 124.0
filename: InterpolationAccelerometerKubaSittingApple.csv p1: 4.8 p2: 332.0
filename: InterpolationAccelerometerKubaSquats2Apple.csv p1: 7.6 p2: 132.3
filename: InterpolationAccelerometerKubaSquats3Apple.csv p1: 3.9 p2: 126.1
filename: InterpolationAccelerometerKubaStanding2Apple.csv p1: 3.2 p2: 127.6
filename: InterpolationAccelerometerKubaStanding3Apple.csv p1: 5.0 p2: 125.0
filename: InterpolationAccelerometerKubaWalking2Apple.csv p1: 4.6 p2: 579.6
filename: InterpolationAccelerometerKubaDownstairsApple.csv p1: 10.4 p2: 71.5
filename: InterpolationAccelerometerKubaSitting2Apple.csv p1: 4.7 p2: 295.3
filename: InterpolationAccelerometerKubaUpstairsApple.csv p1: 3.3 p2: 67.0
filename: InterpolationAccelerometerKubaWalkingApple.csv p1: 3.5 p2: 340.2
filename: I

In [37]:
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
foldername = "fourth-run"
input_data = pd.read_csv('startend_points.csv', sep=';', decimal=",")
# input_data = input_data.iloc[:2]
results = pd.DataFrame()
for idx, row in input_data.iterrows():
    if row['filename'] in files_drop:
        continue

    print(f"filename: {row['filename']} p1: {row['p1']} p2: {row['p2']}")
    temp = pd.read_csv(os.path.join(foldername, row['filename']))
    activity = temp['activity'].unique()[0]
    temp = temp[['time','x','y','z','mag']]
    temp['time'] = (temp['time'] - temp['time'].iloc[0])/1000000000

    windows, labels, window_times = split_dataframe_to_windows(temp, window_length=0.5, p1=row['p1'], p2=row['p2'])

    windows1 = []
    for i, window in enumerate(windows):
        if i <= len(windows)/2:
            windows1.append(window)
        else:
            windows1.append(window.iloc[::-1])
    rise_times, fall_times = calculate_rise_fall_times_optimized(windows1)
    # rise_times1, fall_times1 = duration_above_below_ma(windows1)

    p1 = pd.DataFrame.from_dict(rise_times).reset_index().drop("r_mag", axis=1)
    p2 = pd.DataFrame.from_dict(fall_times).reset_index().drop("f_mag", axis=1)
    # p11 = pd.DataFrame.from_dict(rise_times1).reset_index().drop("a_mag", axis=1)
    # p22 = pd.DataFrame.from_dict(fall_times1).reset_index().drop("b_mag", axis=1)
    X = p1.merge(p2).drop("index", axis=1)
    # X = p1.merge(p2).merge(p11).merge(p22).drop("index", axis=1)

    train_idx = get_indexes(window_times, labels, 3.0, 3.0)
    # clf = RandomForestClassifier(class_weight={0:0.9, 1:0.1})
    # clf = SVC(C=0.00001, kernel='poly', degree=5, probability=False, class_weight={0:0.9, 1:0.1})
    clf = SVC(probability=False, class_weight={0:0.9, 1:0.1})
    # clf = AdaBoostClassifier(n_estimators=100)
    clf.fit(X.iloc[train_idx], [labels[i] for i in train_idx])
    y_pred = clf.predict(X)
    try:
        raw_results = precision_recall_fscore_support(labels, y_pred)
        y_pred1 = modify_list(y_pred.tolist())
        mod_results = precision_recall_fscore_support(labels, y_pred1)
        y_pred2 = modify_list2(y_pred1)
        mod2_results = precision_recall_fscore_support(labels, y_pred2)
        row_result_dict = {'name': row['filename'], 'activity': activity,
                      'raw_p0': raw_results[0][0], 'raw_r0': raw_results[0][1], 'raw_p1': raw_results[1][0], 'raw_r1': raw_results[1][1],
                      'mod1_p0': mod_results[0][0], 'mod1_r0': mod_results[0][1], 'mod1_p1': mod_results[1][0], 'mod1_r1': mod_results[1][1],
                      'mod2_p0': mod2_results[0][0], 'mod2_r0': mod2_results[0][1], 'mod2_p1': mod2_results[1][0], 'mod2_r1': mod2_results[1][1],
                      }
    except ValueError:
        row_result_dict = {'name': row['filename'], 'activity': activity,
                      'raw_p0': 0.0, 'raw_r0': 0.0, 'raw_p1': 0.0, 'raw_r1': 0.0,
                      'mod1_p0': 0.0, 'mod1_r0': 0.0, 'mod1_p1': 0.0, 'mod1_r1': 0.0,
                      'mod2_p0': 0.0, 'mod2_r0': 0.0, 'mod2_p1': 0.0, 'mod2_r1': 0.0,
                      }
    temp_df = pd.DataFrame(row_result_dict, index=[0])
    results = pd.concat([results,temp_df]).reset_index(drop=True)
results.head()
results.to_csv('a_SVC_normal.csv', index=False)

filename: InterpolationAccelerometerKubaDownstairs3Apple.csv p1: 3.4 p2: 75.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaDownstairs2Apple.csv p1: 4.0 p2: 69.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaSquatsApple.csv p1: 3.8 p2: 124.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaSittingApple.csv p1: 4.8 p2: 332.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaSquats2Apple.csv p1: 7.6 p2: 132.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaSquats3Apple.csv p1: 3.9 p2: 126.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaStanding2Apple.csv p1: 3.2 p2: 127.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaStanding3Apple.csv p1: 5.0 p2: 125.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaWalking2Apple.csv p1: 4.6 p2: 579.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaDownstairsApple.csv p1: 10.4 p2: 71.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaSitting2Apple.csv p1: 4.7 p2: 295.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaUpstairsApple.csv p1: 3.3 p2: 67.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaWalkingApple.csv p1: 3.5 p2: 340.2


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaStandingApple.csv p1: 3.3 p2: 124.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaUpstairs2Apple.csv p1: 3.3 p2: 81.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaLying3Apple.csv p1: 8.6 p2: 233.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaLying2Apple.csv p1: 4.0 p2: 96.2


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKubaLyingApple.csv p1: 5.0 p2: 164.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKasperDownstairsApple.csv p1: 0.5 p2: 85.8


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKasperSittingApple.csv p1: 15.2 p2: 227.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKasperStandingApple.csv p1: 6.0 p2: 190.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKasperUpstairsApple.csv p1: 3.3 p2: 95.9


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKasperWalkingApple.csv p1: 3.4 p2: 219.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationDownstairs2Apple.csv p1: 1.6 p2: 40.4


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationDownstairs3Apple.csv p1: 0.3 p2: 38.8


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationJogging2Apple.csv p1: 3.8 p2: 56.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationJogging3Apple.csv p1: 3.0 p2: 38.35


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationLying2Apple.csv p1: 3.4 p2: 109.4


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationLying3Apple.csv p1: 5.0 p2: 185.9


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationLyingApple.csv p1: 7.0 p2: 122.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationSitting2Apple.csv p1: 5.9 p2: 130.9


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationSitting3Apple.csv p1: 16.7 p2: 241.2


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationSitting4Apple.csv p1: 9.4 p2: 155.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationSquats2Apple.csv p1: 1.7 p2: 21.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationSquatsApple.csv p1: 2.0 p2: 21.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationStanding2Apple.csv p1: 3.0 p2: 44.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationUpstairs2Apple.csv p1: 2.3 p2: 67.2


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationUpstairs3Apple.csv p1: 1.7 p2: 36.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationUpstairsApple.csv p1: 3.1 p2: 38.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationWalking2Apple.csv p1: 7.7 p2: 817.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerUserOrientationWalkingApple.csv p1: 6.9 p2: 698.9


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorDownstairs2Apple.csv p1: 6.2 p2: 65.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorDownstairs3Apple.csv p1: 2.4 p2: 56.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorDownstairsApple.csv p1: 2.1 p2: 58.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorJoggingApple.csv p1: 3.4 p2: 135.4


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorLyingApple.csv p1: 7.3 p2: 161.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorSitting2Apple.csv p1: 9.6 p2: 123.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorSitting3Apple.csv p1: 4.2 p2: 113.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorSitting4Apple.csv p1: 1.8 p2: 127.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorSittingApple.csv p1: 5.2 p2: 121.2


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorStanding2Apple.csv p1: 3.0 p2: 138.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorStandingApple.csv p1: 5.0 p2: 184.1


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorUpstairsApple.csv p1: 2.5 p2: 64.4


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorWalking2Apple.csv p1: 1.4 p2: 90.2
filename: InterpolationAccelerometerWiktorWalking3Apple.csv p1: 1.4 p2: 118.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerWiktorWalkingApple.csv p1: 2.2 p2: 127.6


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolJogging.csv p1: 2.9 p2: 130.4


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolJogging2.csv p1: 2.1 p2: 133.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolWalking.csv p1: 2.0 p2: 128.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolWalking2.csv p1: 2.7 p2: 130.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolSquats.csv p1: 2.7 p2: 128.3


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolUpstairs.csv p1: 2.6 p2: 111.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolUpstairs2.csv p1: 2.4 p2: 113.8


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolDownstairs.csv p1: 3.8 p2: 99.8


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerKarolDownstairs2.csv p1: 2.3 p2: 99.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalWalking5.csv p1: 4.0 p2: 119.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalWalking3.csv p1: 6.0 p2: 142.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalUpstairs3.csv p1: 7.0 p2: 42.5


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalUpstairs2.csv p1: 7.0 p2: 28.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalUpstairs.csv p1: 4.1 p2: 35.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSquats4.csv p1: 4.0 p2: 93.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSquats3.csv p1: 5.5 p2: 129.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSquats2.csv p1: 4.5 p2: 124.7


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSquats.csv p1: 4.0 p2: 118.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting6.csv p1: 4.0 p2: 170.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting5.csv p1: 5.0 p2: 95.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting4.csv p1: 5.0 p2: 170.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting3.csv p1: 4.0 p2: 135.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting2.csv p1: 4.0 p2: 128.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalSitting.csv p1: 4.0 p2: 236.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalDownstairs2.csv p1: 4.5 p2: 49.0


  _warn_prf(average, modifier, msg_start, len(result))


filename: InterpolationAccelerometerMichalDownstairs.csv p1: 2.2 p2: 45.7


  _warn_prf(average, modifier, msg_start, len(result))


In [36]:
results.head(19)

Unnamed: 0,name,activity,raw_p0,raw_r0,raw_p1,raw_r1,mod1_p0,mod1_r0,mod1_p1,mod1_r1,mod2_p0,mod2_r0,mod2_p1,mod2_r1
0,InterpolationAccelerometerKubaDownstairs3Apple...,downstairs,0.086667,1.0,1.0,0.041958,0.13,1.0,1.0,0.391608,0.127451,1.0,1.0,0.377622
1,InterpolationAccelerometerKubaDownstairs2Apple...,downstairs,0.113821,1.0,1.0,0.161538,0.608696,1.0,1.0,0.930769,0.56,1.0,1.0,0.915385
2,InterpolationAccelerometerKubaSquatsApple.csv,squats,0.067568,1.0,1.0,0.1375,0.75,1.0,1.0,0.979167,0.681818,1.0,1.0,0.970833
3,InterpolationAccelerometerKubaSittingApple.csv,sitting,0.032872,1.0,1.0,0.14526,0.95,1.0,1.0,0.998471,0.863636,1.0,1.0,0.995413
4,InterpolationAccelerometerKubaSquats2Apple.csv,squats,0.092025,0.99,0.9375,0.40081,0.764706,0.987805,0.8125,0.983806,0.736842,0.991803,0.875,0.979757
5,InterpolationAccelerometerKubaSquats3Apple.csv,squats,0.094595,1.0,1.0,0.17623,0.65625,1.0,1.0,0.954918,0.617647,1.0,1.0,0.946721
6,InterpolationAccelerometerKubaStanding2Apple.csv,standing,0.063492,0.923077,0.941176,0.048387,0.888889,0.995951,0.941176,0.991935,0.85,1.0,1.0,0.987903
7,InterpolationAccelerometerKubaStanding3Apple.csv,standing,0.079365,0.8,0.909091,0.033333,0.76,0.987342,0.863636,0.975,0.740741,0.991489,0.909091,0.970833
8,InterpolationAccelerometerKubaWalking2Apple.csv,walking,0.019512,1.0,1.0,0.300261,0.64,1.0,1.0,0.992167,0.592593,1.0,1.0,0.990426
9,InterpolationAccelerometerKubaDownstairsApple.csv,downstairs,0.262295,0.941176,0.941176,0.262295,0.862069,0.929134,0.735294,0.967213,0.83871,0.936,0.764706,0.959016


In [367]:
rev_data_nomag_ada.drop(9).mean(axis=0)

  rev_data_nomag_ada.drop(9).mean(axis=0)


raw_p0     0.121466
raw_r0     0.982180
raw_p1     0.840276
raw_r1     0.574264
mod1_p0    0.967484
mod1_r0    0.985640
mod1_p1    0.778013
mod1_r1    0.997889
mod2_p0    0.921062
mod2_r0    0.990134
mod2_p1    0.848955
mod2_r1    0.994299
dtype: float64

In [161]:
SVC_low = pd.read_csv('a_SVC_test.csv')
SVC_low = SVC_low[SVC_low["raw_p0"]>0]
df_result = pd.concat([SVC_low.mean(axis=0), ], axis=1)
df_result.columns = ['SVC_low', 'normal', 'normal_r_nomag', 'normal_nomag', 'normal_r_nomag_ada', 'normal_nomag_ada', 'normal_r_nomag_c0d1', 'normal_r_nomag_c0d05', 'normal_r_nomag_d5']
df_result.head(len(df_result))

  df_result = pd.concat([rev_data.mean(axis=0), notrev_data.mean(axis=0), rev_data_nomag.mean(axis=0), data_nomag.mean(axis=0), rev_data_nomag_ada.mean(axis=0), data_nomag_ada.mean(axis=0), rev_data_nomag_c0d1.mean(axis=0),  rev_data_nomag_c0d05.mean(axis=0), rev_data_nomag_c5.mean(axis=0)], axis=1)


Unnamed: 0,normal_r,normal,normal_r_nomag,normal_nomag,normal_r_nomag_ada,normal_nomag_ada,normal_r_nomag_c0d1,normal_r_nomag_c0d05,normal_r_nomag_d5
raw_p0,0.121358,0.127569,0.102795,0.110507,0.137287,0.139015,0.088489,0.082257,0.083208
raw_r0,0.981184,0.982804,0.981096,0.978786,0.978339,0.979202,0.962091,0.955505,0.9725
raw_p1,0.879364,0.896445,0.907354,0.882446,0.836078,0.856087,0.963252,0.973961,0.951011
raw_r1,0.482859,0.54005,0.3757,0.460028,0.580612,0.601222,0.223077,0.161224,0.193833
mod1_p0,0.939783,0.936971,0.917396,0.928169,0.96929,0.967435,0.793512,0.673767,0.752152
mod1_r0,0.984076,0.984603,0.984697,0.983385,0.978613,0.981412,0.991255,0.992371,0.990455
mod1_p1,0.818476,0.837794,0.84352,0.800361,0.757666,0.795482,0.911206,0.931835,0.900206
mod1_r1,0.995994,0.995756,0.993808,0.996658,0.998006,0.997877,0.976899,0.891682,0.944651
mod2_p0,0.888699,0.881841,0.868868,0.888199,0.925447,0.919165,0.744046,0.633033,0.7137
mod2_r0,0.987873,0.988376,0.988508,0.988348,0.983539,0.986074,0.993503,0.938766,0.992963
