In [60]:
from typing import Union
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [62]:
''' Helper functions '''


def append_from_csv(file: Path, sep: str, train_data: bool) -> None:
    try:
        df: pd.DataFrame = pd.read_csv(file, sep=sep, dtype=str)
    except Exception as ex:
#         print(f'{file} :')
#         print(ex)
        pass
    else:
        if df.shape[1] == 5:
            try:
                df = df.applymap(lambda x: str(x).replace(',', '.'))
                df['gFx'] = df['gFx'].astype(float)
                df['gFy'] = df['gFy'].astype(float)
                df['gFz'] = df['gFz'].astype(float)
            except Exception as ex:
#                 print(f'{file} :')
#                 print(ex)
                pass
            else:
                DataFrames.append(df)
            
                if train_data:
                    Types.append(correct_type)
                

def collect(path: Path, train_data: bool):

    type_patterns = {
        ('cтояние', 'тояни', 'месте'): {'name': 'стояние', 'id': 0},
        ('ходьба', 'одьб', 'аг'): {'name': 'ходьба', 'id': 1},
        ('бег'): {'name': 'бег', 'id': 2},
        ('велосипед', 'елоси'): {'name': 'велосипед', 'id': 3},
        ('лестница', 'естн', 'одъ'): {'name': 'лестница', 'id': 4},
        ('автомобиль', 'втомо', 'ашин'): {'name': 'автомобиль', 'id': 5},
        ('метро'): {'name': 'метро', 'id': 6},
        ('автобус', 'автоб'): {'name': 'автобус', 'id': 7},
        ('самокат', 'амок'): {'name': 'самокат', 'id': 8}
    }

    DataFrames: list[pd.DataFrame] = []
    
    if train_data:
        Types: list[int] = []

    for sub_path in public_data_path.iterdir():

        if sub_path.is_file():
            
            if train_data:

                try:
                    type_str = str(sub_path.name).split('_')[-2]
                except IndexError:
                    continue
                else:
                    correct_type = int()

                    for key, value in type_patterns.items():
                        if type_str.lower() in key:
                            correct_type = value['id']

                    if correct_type not in range(9):
                        raise ValueError(f'Ошибка при определении типа движения. {type_str = }')

            append_from_csv(sub_path, ',', train_data)
            append_from_csv(sub_path, ';', train_data)
            
    if train_data:
        return DataFrames, Types
    else:
        return DataFrames
            
                
def process_frame(df: pd.DataFrame, label: int) -> pd.Series:
    start = int(0.2 * len(df))
    end = -int(0.2 * len(df))

#     x_mean = np.mean(df.gFx[start:end])
#     y_mean = np.mean(df.gFy[start:end])
#     z_mean = np.mean(df.gFz[start:end])
    
#     return pd.Series(
#         [x_mean, y_mean, z_mean, label],
#         index=['x_mean', 'y_mean', 'z_mean', 'label']
#     )
    
    accel = np.log(np.std((df.gFx[start:end] ** 2 + df.gFy[start:end] ** 2 + df.gFz[start:end] ** 2) ** 0.5))
    
    return pd.Series(
        [accel, label],
        index=['accel', 'label']
    )

In [63]:
''' Сбор данных из файлов c разметкой типов '''


train_data_path = Path('./public_data')
test_data_path = Path('./kaggle_data_open')


train_data, train_labels = collect(train_data_path, True)
test_data = collect(test_data_path, False)

In [55]:
train_simple = pd.DataFrame(columns=['accel', 'label'])
train_advanced = pd.DataFrame(columns=['accel', 'label'])


for frame, label in zip(DataFrames, Types):
    
    if label in (0, 1, 2, 3, 4):
        train_simple = train_simple.append(process_frame(frame, label), ignore_index=True)
        
    if label in (5, 6, 7, 8):
        train_advanced = train_advanced.append(process_frame(frame, label), ignore_index=True)

In [57]:
X_simple = np.array(train_simple.accel).reshape(-1, 1)
y_simple = train_simple.label

X_advanced = np.array(train_advanced.accel).reshape(-1, 1)
y_advanced = train_advanced.label

model_simple = LogisticRegression().fit(X_simple, y_simple)
model_advanced = LogisticRegression().fit(X_advanced, y_advanced)

In [58]:
model_simple.score(X_simple, y_simple)

0.616519174041298

In [59]:
model_advanced.score(X_advanced, y_advanced)

0.5