In [1]:
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Load the dataset
try:
    with open('./data.pickle', 'rb') as f:
        data_dict = pickle.load(f)
except Exception as e:
    print(f"Error loading data: {e}")
    raise

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

# Ensure data consistency
if len(data) == 0 or len(labels) == 0:
    raise ValueError("Data or labels are empty. Please check your dataset.")

if len(data) != len(labels):
    raise ValueError("Mismatch between number of data points and labels.")

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Initialize and train the model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Make predictions and evaluate the model
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print(f'{score * 100:.2f}% of samples were classified correctly!!!!')

# Save the trained model
try:
    with open('model.p', 'wb') as f:
        pickle.dump({'model': model}, f)
except Exception as e:
    print(f"Error saving model: {e}")
    raise


100.00% of samples were classified correctly!!!!
