In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import os
import pickle

path = "S1_Dataset"
dirs = os.listdir(path)
data = np.genfromtxt(f'S1_Dataset/{dirs[0]}', delimiter=',')
for i in range(1, len(dirs)):
    data = np.concatenate((data, np.genfromtxt(f'S1_Dataset/{dirs[i]}', delimiter=',')), axis=0)
    
data = data[:300,:]
X = data[:, :8]
y = data[:, 8]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

model = GaussianNB(var_smoothing=0.00007)
model.fit(X_train, y_train)

train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)

In [2]:
print('Общий вид выборки:')
pd.DataFrame(data=data, columns=["time", "frontal axis acc", "vertical axis acc", "lateral axis acc", "id of antenna", "signal strength", "phase", "frequency", "label of activity"])


Общий вид выборки:


Unnamed: 0,time,frontal axis acc,vertical axis acc,lateral axis acc,id of antenna,signal strength,phase,frequency,label of activity
0,0.00,0.27203,1.00820,-0.082102,1.0,-63.5,2.42520,924.25,1.0
1,0.50,0.27203,1.00820,-0.082102,1.0,-63.0,4.73690,921.75,1.0
2,1.50,0.44791,0.91636,-0.013684,1.0,-63.5,3.03110,923.75,1.0
3,1.75,0.44791,0.91636,-0.013684,1.0,-63.0,2.03710,921.25,1.0
4,2.50,0.34238,0.96229,-0.059296,1.0,-63.5,5.89200,920.25,1.0
...,...,...,...,...,...,...,...,...,...
295,204.25,1.09280,-0.28938,-0.059296,1.0,-60.5,4.99620,920.75,3.0
296,205.25,1.26870,-0.36976,0.009122,1.0,-64.5,4.34420,924.75,3.0
297,206.00,1.29210,-0.38124,0.020525,1.0,-61.5,6.18040,922.75,3.0
298,206.25,1.29210,-0.38124,0.020525,1.0,-60.5,4.52680,921.25,3.0


In [3]:
print(f'Точность обучающей выборки: {train_acc}')
print(f'Ошибка классификации: {1 - train_acc}')
print(f'Точность проверочной выборки: {test_acc}')

Точность обучающей выборки: 0.9751243781094527
Ошибка классификации: 0.02487562189054726
Точность проверочной выборки: 0.98989898989899


In [4]:
print(f'Правила классификации модели:')
pd.DataFrame(model.var_)

Правила классификации модели:


Unnamed: 0,0,1,2,3,4,5,6,7
0,1986.952646,0.241931,0.203,0.206084,2.10982,9.655102,4.36289,3.080722
1,240.265594,0.207768,0.201915,0.201982,0.200729,0.871669,5.147255,3.146817
2,1823.278577,0.219767,0.21378,0.206101,0.945885,59.943758,4.758223,2.751414
3,1487.028854,0.213446,0.202072,0.204825,0.450729,21.122604,4.553128,2.872604


In [5]:
pickle.dump(model,open('model.sav', 'wb'))
loaded_model = pickle.load(open('model.sav', 'rb'))
print(f'Правила классификации загруженной модели:')
pd.DataFrame(loaded_model.var_)

Правила классификации загруженной модели:


Unnamed: 0,0,1,2,3,4,5,6,7
0,1986.952646,0.241931,0.203,0.206084,2.10982,9.655102,4.36289,3.080722
1,240.265594,0.207768,0.201915,0.201982,0.200729,0.871669,5.147255,3.146817
2,1823.278577,0.219767,0.21378,0.206101,0.945885,59.943758,4.758223,2.751414
3,1487.028854,0.213446,0.202072,0.204825,0.450729,21.122604,4.553128,2.872604


In [7]:
print(f'Подбор гиперпараметра:')
i=0
result = pd.DataFrame([], columns=["var_smoothing", "train acc", "test acc"])
for smoothing in np.arange(0.007,0.000007,-0.000007):
    new_model = GaussianNB(var_smoothing=smoothing)
    new_model.fit(X_train, y_train)
    result.loc[i]=[round(smoothing, 10), new_model.score(X_train, y_train), new_model.score(X_test, y_test)]
    i+=1

print(result.to_markdown())
result

Подбор гиперпараметра:
|     |   var_smoothing |   train acc |   test acc |
|----:|----------------:|------------:|-----------:|
|   0 |        0.007    |    0.825871 |   0.757576 |
|   1 |        0.006993 |    0.825871 |   0.757576 |
|   2 |        0.006986 |    0.825871 |   0.757576 |
|   3 |        0.006979 |    0.825871 |   0.757576 |
|   4 |        0.006972 |    0.825871 |   0.757576 |
|   5 |        0.006965 |    0.825871 |   0.757576 |
|   6 |        0.006958 |    0.825871 |   0.757576 |
|   7 |        0.006951 |    0.825871 |   0.757576 |
|   8 |        0.006944 |    0.825871 |   0.757576 |
|   9 |        0.006937 |    0.825871 |   0.757576 |
|  10 |        0.00693  |    0.825871 |   0.757576 |
|  11 |        0.006923 |    0.825871 |   0.757576 |
|  12 |        0.006916 |    0.825871 |   0.757576 |
|  13 |        0.006909 |    0.825871 |   0.757576 |
|  14 |        0.006902 |    0.825871 |   0.757576 |
|  15 |        0.006895 |    0.825871 |   0.757576 |
|  16 |        0.006888

Unnamed: 0,var_smoothing,train acc,test acc
0,0.007000,0.825871,0.757576
1,0.006993,0.825871,0.757576
2,0.006986,0.825871,0.757576
3,0.006979,0.825871,0.757576
4,0.006972,0.825871,0.757576
...,...,...,...
994,0.000042,0.980100,0.989899
995,0.000035,0.985075,0.989899
996,0.000028,0.990050,0.989899
997,0.000021,0.990050,0.989899
