In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from sklearn.metrics import classification_report


In [12]:
def replace_non_numbers(item):
    try:
        return int(item)
    except ValueError:
        return np.nan

def eval_list_str(x):
    # Remove square brackets and newline characters
    x = x.strip('[]\n')
    x = x.replace('...', '')
    x = x.replace("''", '')
    x = x.replace("'", '')

    # Split the string by spaces
    number_strings = x.split()

    # Convert number strings to integers
    number_list = [replace_non_numbers(num) for num in number_strings]
    return number_list


In [5]:
data = pd.read_csv("data_joined_2.csv", index_col=0)

# Baseline para classificar atividade muscular

* 0: sem atividade muscular
* 1: com atividade muscular

In [16]:
data["std"] = data["emg"].apply(lambda x: np.std(eval_list_str(x)))
data["mean"] = data["emg"].apply(lambda x: np.mean(eval_list_str(x)))
data["min"] = data["emg"].apply(lambda x: np.min(eval_list_str(x)))
data["max"] = data["emg"].apply(lambda x: np.max(eval_list_str(x)))
data = data.fillna(method="ffill")

In [17]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [18]:
X = data[["std", "mean", "min", "max"]]
y = data["label"]

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.25, 
                                                    random_state=32)

In [20]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [21]:
ridge_cf = RidgeClassifier()
ridge_cf.fit(X_train, y_train)
print("Train Score: ", ridge_cf.score(X_train, y_train))
print("Test Score: ", ridge_cf.score(X_test, y_test))

ValueError: Input X contains NaN.
RidgeClassifier does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [25]:
cm = confusion_matrix(y_test, ridge_cf.predict(X_test))

fig = px.imshow(cm,
                labels=dict(x="Predicted", y="True"),
                x=['Classe 0', 'Classe 1'],
                y=['Classe 0', 'Classe 1'],
                color_continuous_scale='Blues')

fig.update_layout(title="Matriz de Confusão",
                  xaxis_title="Classe Predita",
                  yaxis_title="Classe Verdadeira")
fig.update_traces(showscale=True, colorbar=dict(title="Counts"))

fig.update_layout(font=dict(size=12))

fig.show()

In [29]:
class_report = classification_report(y_test, ridge_cf.predict(X_test), target_names=['Class 0', 'Class 1'])


In [30]:
print(class_report)

              precision    recall  f1-score   support

     Class 0       0.75      1.00      0.86         3
     Class 1       1.00      0.80      0.89         5

    accuracy                           0.88         8
   macro avg       0.88      0.90      0.87         8
weighted avg       0.91      0.88      0.88         8

