In [9]:
import pandas as pd
import os 
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


def calculate_time_domain_features(df):
    time_domain_features = {}
    for axis in ['X', 'Y', 'Z']:
        axis_data = df[f'accelerometer_{axis}']
        time_domain_features[f'Mean_{axis}'] = axis_data.mean()
        time_domain_features[f'Std_{axis}'] = axis_data.std()
        time_domain_features[f'Max_{axis}'] = axis_data.max()
        time_domain_features[f'Min_{axis}'] = axis_data.min()
    return time_domain_features


def load_data(zip_filename):
    data = []
    labels = []
    
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        files = zip_ref.namelist()
        
        for file in files:
            if file.endswith('.csv'):
                label = os.path.basename(file).split('-')[0]
                labels.append(label)
                df = pd.read_csv(zip_ref.open(file))
                time_domain_features = calculate_time_domain_features(df)
                data.append(time_domain_features)
    
    return pd.DataFrame(data), labels


def main():
    X, y = load_data('homework.zip')
    
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    svm_model = SVC(kernel='linear')
    rf_model = RandomForestClassifier(n_estimators=100)

    svm_model.fit(X_train, y_train)
    rf_model.fit(X_train, y_train)

    svm_pred = svm_model.predict(X_test)
    rf_pred = rf_model.predict(X_test)

    print("Wyniki SVM:")
    print(classification_report(y_test, svm_pred, target_names=label_encoder.classes_))
    
    print("Wyniki lasu losowego:")
    print(classification_report(y_test, rf_pred, target_names=label_encoder.classes_))


if __name__ == "__main__":
    main()


Label: idle
Time domain features: {'Mean_X': 0.17844786666666668, 'Std_X': 1.0363608474565926, 'Max_X': 5.09965, 'Min_X': -0.909797, 'Mean_Y': 0.1674345333333333, 'Std_Y': 1.15760347498444, 'Max_Y': 4.616021, 'Min_Y': -0.282516, 'Mean_Z': 9.605696899999996, 'Std_Z': 0.4069033795748119, 'Max_Z': 9.80665, 'Min_Z': 8.418014}
Label: idle
Time domain features: {'Mean_X': -0.09864106666666662, 'Std_X': 0.125847653653667, 'Max_X': 0.407014, 'Min_X': -0.320823, 'Mean_Y': -0.13120213333333333, 'Std_Y': 0.04805878225438834, 'Max_Y': 0.0239419999999999, 'Min_Y': -0.244209, 'Mean_Z': 9.771216033333332, 'Std_Z': 0.025418793173125936, 'Max_Z': 9.80665, 'Min_Z': 9.667787}
Label: idle
Time domain features: {'Mean_X': -0.09991786666666662, 'Std_X': 0.015642221948906444, 'Max_X': -0.062249, 'Min_X': -0.124498, 'Mean_Y': 0.22090513333333334, 'Std_Y': 0.01638287305175393, 'Max_Y': 0.244209, 'Min_Y': 0.177171, 'Mean_Z': 9.768502666666668, 'Std_Z': 0.014528405542563568, 'Max_Z': 9.792285, 'Min_Z': 9.739613}