In [1]:
import numpy as np
from collections import Counter

'''
data: np.array
query: np.array
data - query: conjunto de punto sin los de la consulta
'''
def knn(data, labels, query, k=3):
    # Calcular distancias entre el query y todos los datos
    distances = np.linalg.norm(data - query, axis=1)
    # Obtener los índices de los k vecinos más cercanos
    k_indices = np.argsort(distances)[:k]
    # Obtener las etiquetas de los k vecinos más cercanos
    k_nearest_labels = labels[k_indices]
    # Devolver el valor más común entre los k vecinos más cercanos
    most_common = Counter(k_nearest_labels).most_common(1)
    return most_common[0][0]


def knn_predict(X, Y, X_new, k=3):
    predictions = []
    for query in X_new:
        prediction = knn(X, Y, query, k)
        predictions.append(prediction)
    return np.array(predictions)


class Nodo:
    def __init__(self, index=None, threshold=None, left=None, right=None, value=None):
        self.index = index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def IsTerminal(self, Y):
        return len(set(Y)) == 1

    def Entropy(self, Y):
        histogram = Counter(Y)
        probabilities = [freq / len(Y) for freq in histogram.values()]
        return -sum(p * np.log2(p) for p in probabilities if p > 0)

    def BestSplit(self, X, Y):
        best_gain = -1
        best_feature, best_threshold = None, None
        num_samples, num_features = X.shape

        current_entropy = self.Entropy(Y)

        idx = 0
        for feature_index in range(num_features):
            print(idx)
            idx += 1
            thresholds = set(X[:, feature_index])
            for threshold in thresholds:
                left_mask = X[:, feature_index] <= threshold
                right_mask = X[:, feature_index] > threshold
                left_y, right_y = Y[left_mask], Y[right_mask]
                if len(left_y) == 0 or len(right_y) == 0:
                    continue

                left_entropy = self.Entropy(left_y)
                right_entropy = self.Entropy(right_y)
                p_left = len(left_y) / len(Y)
                gain = current_entropy - (p_left * left_entropy + (1 - p_left) * right_entropy)

                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature_index
                    best_threshold = threshold

        return best_feature, best_threshold

    def Gini(self, Y):
        # write your code here
        pass


class DT:
    def __init__(self):
        self.m_Root = None

    def create_DT(self, X, Y):
        self.m_Root = self._build_tree(X, Y)

    def _build_tree(self, X, Y):
        node = Nodo()
        if node.IsTerminal(Y):
            node.value = Counter(Y).most_common(1)[0][0]
            return node

        best_feature, best_threshold = self.Find_Best_Split(X, Y)
        if best_feature is None:
            node.value = Counter(Y).most_common(1)[0][0]
            return node

        left_mask = X[:, best_feature] <= best_threshold
        right_mask = X[:, best_feature] > best_threshold
        left_X, right_X = X[left_mask], X[right_mask]
        left_Y, right_Y = Y[left_mask], Y[right_mask]

        node.index = best_feature
        node.threshold = best_threshold
        node.left = self._build_tree(left_X, left_Y)
        node.right = self._build_tree(right_X, right_Y)

        return node

    def Find_Best_Split(self, X, Y):
        node = Nodo()
        return node.BestSplit(X, Y)

    def _predict_one(self, x):
        node = self.m_Root
        while node.value is None:
            if x[node.index] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value


def dt_predict(X, Y, X_new):
    print('DT 1')
    tree_model = DT()

    # Entrenar el modelo
    print('DT 2')
    tree_model.create_DT(X, Y)

    print('DT 3')
    # Realizar las predicciones
    Y_pred = np.array([tree_model._predict_one(x) for x in X_new])
    print('DT 4')

    return Y_pred

In [2]:
import h5py
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
from sklearn.model_selection import train_test_split
from pyts.transformation import ROCKET

In [3]:
with h5py.File('train.h5', 'r') as Train:
    keys = list(Train.keys())
    data_train = []
    body_acc_x = np.array(Train[keys[0]])
    body_acc_y = np.array(Train[keys[1]])
    body_acc_z = np.array(Train[keys[2]])
    body_gyro_x = np.array(Train[keys[3]])
    body_gyro_y = np.array(Train[keys[4]])
    body_gyro_z = np.array(Train[keys[5]])
    total_acc_x = np.array(Train[keys[6]])
    total_acc_y = np.array(Train[keys[7]])
    total_acc_z = np.array(Train[keys[8]])
    y = np.array(Train[keys[9]])

    for i in range(len(body_acc_x)):
        data_train.append(
            np.concatenate(
                (
                    body_acc_x[i], body_acc_y[i], body_acc_z[i],
                    body_gyro_x[i], body_gyro_y[i], body_gyro_z[i],
                    total_acc_x[i], total_acc_y[i], total_acc_z[i]
                )
            )
        )

print(data_train)

[array([0.00018085, 0.01013856, 0.00927557, ..., 0.1003852 , 0.09987355,
       0.09498741]), array([0.00109375, 0.00455008, 0.00287917, ..., 0.0935352 , 0.08903516,
       0.09061235]), array([ 0.00353127,  0.00228506, -0.00041975, ...,  0.08301135,
        0.08233391,  0.08148748]), array([-0.00177235, -0.00131145,  0.00038768, ...,  0.08576054,
        0.08327454,  0.08140418]), array([ 8.747685e-05, -2.719175e-04,  1.022103e-03, ...,  8.132876e-02,
        8.539719e-02,  8.881566e-02]), array([ 0.00052516, -0.00186328, -0.00151023, ...,  0.08812156,
        0.08816615,  0.08748509]), array([-0.00365486, -0.00402574, -0.00125936, ...,  0.08043603,
        0.07778891,  0.07410929]), array([0.003776  , 0.00426277, 0.00489636, ..., 0.0740713 , 0.07404372,
       0.07339988]), array([0.0062    , 0.00263264, 0.00130771, ..., 0.07307697, 0.07599545,
       0.07555214]), array([-0.00164937, -0.00168442, -0.00107347, ...,  0.08064469,
        0.0818149 ,  0.07943561]), array([ 0.0053247 , -

In [6]:
# Entrenamiento ROCKET
rocket = ROCKET()
x_train_complete = rocket.fit_transform(data_train)

In [None]:
# Reducción de dimensionalidad con PCA
pca_train = PCA(n_components=10)
x_train_complete_tmp = pca_train.fit_transform(x_train_complete)
y_train_complete = y

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(x_train_complete_tmp, y_train_complete, test_size=0.3, random_state=42)
print('Prediction begins')
y_temp = dt_predict(X_train, y_train, X_test)
print('Prediction ends')
acc = accuracy_score(y_true=y_test, y_pred=y_temp)
print(acc)

In [5]:
# Reducción de dimensionalidad con PCA
pca_train = PCA(n_components=50)
x_train_complete_tmp = pca_train.fit_transform(x_train_complete)
y_train_complete = y

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(x_train_complete_tmp, y_train_complete, test_size=0.3, random_state=42)
print('Prediction begins')
y_temp = dt_predict(X_train, y_train, X_test)
print('Prediction ends')
acc = accuracy_score(y_true=y_test, y_pred=y_temp)
print(acc)


ValueError: n_components=50 must be between 0 and min(n_samples, n_features)=10 with svd_solver='covariance_eigh'

In [11]:
with h5py.File('test.h5', 'r') as Pred:
    data_pred = []
    body_acc_x_pred = np.array(Pred[keys[0]])
    body_acc_y_pred = np.array(Pred[keys[1]])
    body_acc_z_pred = np.array(Pred[keys[2]])
    body_gyro_x_pred = np.array(Pred[keys[3]])
    body_gyro_y_pred = np.array(Pred[keys[4]])
    body_gyro_z_pred = np.array(Pred[keys[5]])
    total_acc_x_pred = np.array(Pred[keys[6]])
    total_acc_y_pred = np.array(Pred[keys[7]])
    total_acc_z_pred = np.array(Pred[keys[8]])
    for i in range(len(body_acc_x_pred)):
        data_pred.append(
            np.concatenate(
                (
                    body_acc_x_pred[i], body_acc_y_pred[i], body_acc_z_pred[i],
                    body_gyro_x_pred[i], body_gyro_y_pred[i], body_gyro_z_pred[i],
                    total_acc_x_pred[i], total_acc_y_pred[i], total_acc_z_pred[i]
                )
            )
        )

rocket start


### KNN PREDICTION

In [None]:
pca_pred = PCA(n_components=100)
x_pred = pca_pred.fit_transform(data_pred)
y_pred = knn_predict(X_train, y_train, x_pred, 5) # K = 5
print(list(y_pred))

### DESITION TREE

In [None]:
pca_pred = PCA(n_components=100)
x_pred = pca_pred.fit_transform(data_pred)
y_pred = dt_predict(X_train, y_train, x_pred)
print(list(y_pred))