In [1]:
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [2]:
data_dict = pickle.load(open('./data.pickle', 'rb'))
data = [np.asarray(item) for item in data_dict['data']]
data

[array([0.43353432, 0.18350464, 0.40010762, 0.08055478, 0.32112801,
        0.03412658, 0.25304812, 0.07279909, 0.21381265, 0.12953073,
        0.29102457, 0.0037795 , 0.1649937 , 0.        , 0.09323794,
        0.00253367, 0.03661621, 0.00535434, 0.2806533 , 0.08837754,
        0.14741051, 0.07067269, 0.06230837, 0.05984047, 0.        ,
        0.0525004 , 0.27428138, 0.1745773 , 0.18972719, 0.16732275,
        0.2204451 , 0.16588768, 0.25363129, 0.1698153 , 0.27336931,
        0.25354725, 0.20691985, 0.22689825, 0.23482686, 0.22176075,
        0.26470351, 0.22460777]),
 array([0.41918981, 0.18451026, 0.39343476, 0.08345756, 0.31586075,
        0.0328787 , 0.24124825, 0.06636447, 0.19699186, 0.11853069,
        0.28948218, 0.00093511, 0.16289961, 0.        , 0.09066403,
        0.00228268, 0.03315592, 0.0058749 , 0.27953291, 0.08548501,
        0.14389265, 0.0686914 , 0.06034815, 0.06042525, 0.        ,
        0.05639416, 0.27412313, 0.17185023, 0.18248302, 0.17023262,
        0.2123

In [3]:
desired_length = 42  # Or whatever your target length is

# Normalize shapes
data_normalized = []
for item in data_dict['data']:
    if len(item) < desired_length:
        # Pad with zeros
        padded_item = np.pad(item, (0, desired_length - len(item)), mode='constant')
        data_normalized.append(padded_item)
    elif len(item) > desired_length:
        # Truncate to the desired length
        truncated_item = item[:desired_length]
        data_normalized.append(truncated_item)
    else:
        data_normalized.append(item)




In [4]:
try:
    data = np.array(data_normalized)  # Convert the normalized data to a NumPy array
    labels = np.array(data_dict['labels'])  # Convert labels to a NumPy array
    print(f"Data shape: {data.shape}, Labels shape: {labels.shape}")  # Check shapes
except ValueError as e:
    print(f"Error converting data: {e}")

Data shape: (1977, 42), Labels shape: (1977,)


In [5]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

In [6]:
model = RandomForestClassifier()

In [7]:
model.fit(x_train, y_train)

In [8]:
y_predict = model.predict(x_test)

In [9]:
score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

98.73737373737373% of samples were classified correctly !


In [10]:
f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()