In [6]:
import pandas as pd
data = pd.read_csv('geophone-sensor-data.csv')

In [7]:
data

Unnamed: 0,timestamp,mean,top_3_mean,min,max,std_dev,median,q1,q3,skewness,dominant_freq,energy,activity,name
0,01:51:43,2046.726000,2057.666667,2035,2060,3.452186,2047.0,2043.0,2049.0,-0.152574,601.0,1.988682e+09,walking,Yusuf
1,01:51:46,2046.619333,2053.000000,2040,2053,3.580469,2047.0,2044.0,2050.0,-0.150615,300.0,1.988028e+09,walking,Yusuf
2,01:51:50,2046.598000,2056.000000,2030,2059,3.592826,2047.0,2044.0,2049.0,-0.370125,300.0,1.987897e+09,walking,Yusuf
3,01:51:53,2046.560000,2054.000000,2039,2055,3.414830,2047.0,2044.0,2049.0,-0.321296,300.0,1.987662e+09,walking,Yusuf
4,01:51:56,2046.656667,2055.000000,2034,2057,3.356802,2048.0,2044.0,2049.0,-0.379134,300.0,1.988255e+09,walking,Yusuf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,21:17:01,2037.587333,2040.000000,2034,2040,1.564089,2038.0,2037.0,2039.0,-0.883816,600.0,1.932680e+09,waiting,Emir
1796,21:17:04,2037.606000,2041.333333,2033,2042,2.160578,2038.0,2036.0,2040.0,-0.199801,300.0,1.932797e+09,waiting,Emir
1797,21:17:07,2037.984000,2041.000000,2034,2041,1.686736,2038.0,2036.0,2039.0,-0.339960,300.0,1.935105e+09,waiting,Emir
1798,21:17:10,2037.482000,2041.000000,2033,2041,2.123286,2037.0,2036.0,2040.0,-0.176030,300.0,1.932039e+09,waiting,Emir


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier

# Фильтрация данных по активности 'walking'
walking_data = data[data['activity'] == 'walking'].copy()

# Проверка дисбаланса классов
print(walking_data['name'].value_counts())

# Преобразование меток классов
le = LabelEncoder()
walking_data['name'] = le.fit_transform(walking_data['name'])

# Выбор признаков и целевой переменной
features = ['mean', 'top_3_mean', 'min', 'max', 'std_dev', 
           'median', 'q1', 'q3', 'skewness', 
           'dominant_freq', 'energy']
X = walking_data[features]
y = walking_data['name']

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# Обучение модели CatBoost
model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    loss_function='MultiClass',
    verbose=100,
    random_state=42
)

model.fit(X_train, y_train, eval_set=(X_test, y_test))

# Предсказания
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)

# Оценка метрик
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")

# Важность признаков
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': model.get_feature_importance()
}).sort_values('importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)


name
Yusuf     120
Enes      120
Furkan    120
Alihan    120
Emir      120
Name: count, dtype: int64
0:	learn: 1.4107658	test: 1.4089146	best: 1.4089146 (0)	total: 58.5ms	remaining: 58.4s
100:	learn: 0.1361619	test: 0.2691414	best: 0.2682559 (95)	total: 170ms	remaining: 1.51s
200:	learn: 0.0682284	test: 0.2616927	best: 0.2598982 (187)	total: 269ms	remaining: 1.07s
300:	learn: 0.0433049	test: 0.2657124	best: 0.2598982 (187)	total: 368ms	remaining: 854ms
400:	learn: 0.0307783	test: 0.2713887	best: 0.2598982 (187)	total: 466ms	remaining: 697ms
500:	learn: 0.0236290	test: 0.2809454	best: 0.2598982 (187)	total: 569ms	remaining: 567ms
600:	learn: 0.0188808	test: 0.2884019	best: 0.2598982 (187)	total: 668ms	remaining: 444ms
700:	learn: 0.0156327	test: 0.2942475	best: 0.2598982 (187)	total: 766ms	remaining: 327ms
800:	learn: 0.0133262	test: 0.2977078	best: 0.2598982 (187)	total: 865ms	remaining: 215ms
900:	learn: 0.0115443	test: 0.3021652	best: 0.2598982 (187)	total: 970ms	remaining: 107ms
999