In [1]:
import sqlite3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# Connect to the SQLite database
conn = sqlite3.connect('Cleaned_DB.db')
cursor = conn.cursor()

# Load data into DataFrame
cursor.execute("SELECT * FROM mockdata")
rows = cursor.fetchall()
columns = ['n', 'timestamp', 'ax', 'ay', 'az', 'gx', 'gy', 'gz', 'label']
df = pd.DataFrame(rows, columns=columns)
conn.close()

# Preprocessing
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Split data into features (X) and labels (y)
X = df[['ax', 'ay', 'az', 'gx', 'gy', 'gz']]
y = df['label']

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Define and train the RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = rf_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f'Random Forest Accuracy: {accuracy:.2f}')
#joblib.dump(rf_model, 'rf_model.joblib')

feature_importances = pd.DataFrame(rf_model.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)
print("Feature Importances:")
print(feature_importances)

Random Forest Accuracy: 0.92
Feature Importances:
    importance
ay    0.210924
az    0.201480
ax    0.194342
gy    0.132461
gz    0.132218
gx    0.128575


In [3]:
import joblib

import pandas as pd

n_data = {
    'ax': [-0.304688],
    'ay': [0.424316],
    'az': [-0.908691],
    'gx': [-6.793893],
    'gy': [-0.725191],
    'gz': [0.618321]
}
a_data = {
    'ax': [0.851563],
    'ay': [0.544434],
    'az': [0.144287],
    'gx': [-6.763359],
    'gy': [-1.083969],
    'gz': [-2.129771]
}

loaded_knn_clf = joblib.load('rf_model.joblib')

n_df = pd.DataFrame(n_data)
a_df = pd.DataFrame(a_data)

# Predict using the loaded model
n_pred = loaded_knn_clf.predict(n_df.values)
a_pred = loaded_knn_clf.predict(a_df.values)

print("normal", n_pred)
print("anomalie", a_pred)

normal [1]
anomalie [0]




In [4]:
import joblib
import pandas as pd
import sqlite3

conn = sqlite3.connect('Cleaned_DB.db')
query_anomalie = "SELECT ax, ay, az, gx, gy, gz FROM mockdata WHERE label = 'anomalie' LIMIT 50"
df_anomalie = pd.read_sql_query(query_anomalie, conn)
conn.close()

loaded_knn_clf = joblib.load('rf_model.joblib')
anomalie_pred = loaded_knn_clf.predict(df_anomalie.values)
print(anomalie_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]


