In [9]:
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data with error handling
try:
    with open('./data.pickle', 'rb') as file:
        data_dict = pickle.load(file)
except FileNotFoundError:
    print("Error: The file 'data.pickle' was not found.")
    exit()
except pickle.UnpicklingError:
    print("Error: Failed to unpickle the file. It might be corrupted.")
    exit()

# Print types and check for element lengths
print("Type of data_dict['data']:", type(data_dict['data']))
print("Type of data_dict['labels']:", type(data_dict['labels']))

# Check the unique lengths of elements in data
unique_lengths = set(len(element) for element in data_dict['data'])
print("Unique lengths of elements in data:", unique_lengths)

# Define the desired uniform length
target_length = 42  # or 84, based on what is more appropriate for your data

# Function to adjust the length of sequences
def adjust_length(sequence, target_length):
    if len(sequence) > target_length:
        return sequence[:target_length]  # Truncate
    elif len(sequence) < target_length:
        return sequence + [0] * (target_length - len(sequence))  # Pad with zeros
    return sequence

# Adjust all sequences in the data
data = np.array([adjust_length(seq, target_length) for seq in data_dict['data']])
labels = np.array(data_dict['labels'])

# Split the data
x_train, x_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.2, shuffle=True, stratify=labels
)

# Initialize and train the RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

# Make predictions and calculate accuracy
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print(f"{score * 100:.2f}% of samples were classified correctly!")

# Save the trained model using a context manager
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)


Type of data_dict['data']: <class 'list'>
Type of data_dict['labels']: <class 'list'>
Unique lengths of elements in data: {42, 84}
99.88% of samples were classified correctly!


In [15]:
import pickle
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Load data with error handling
try:
    with open('./data.pickle', 'rb') as file:
        data_dict = pickle.load(file)
except FileNotFoundError:
    print("Error: The file 'data.pickle' was not found.")
    exit()
except pickle.UnpicklingError:
    print("Error: Failed to unpickle the file. It might be corrupted.")
    exit()

# Check unique lengths of elements in data
unique_lengths = set(len(element) for element in data_dict['data'])
print("Unique lengths of elements in data:", unique_lengths)

# Define the desired uniform length
target_length = 42  # or 84, depending on your data characteristics

# Function to adjust the length of sequences
def adjust_length(sequence, target_length):
    if len(sequence) > target_length:
        return sequence[:target_length]  # Truncate
    elif len(sequence) < target_length:
        return sequence + [0] * (target_length - len(sequence))  # Pad with zeros
    return sequence

# Adjust all sequences in the data
data = np.array([adjust_length(seq, target_length) for seq in data_dict['data']])
labels = np.array(data_dict['labels'])

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.2, shuffle=True, stratify=labels
)

# Initialize the SVM model with an RBF kernel
model = SVC(kernel='rbf')

# Train the model
model.fit(x_train, y_train)

# Make predictions
y_predict = model.predict(x_test)

# Calculate the accuracy score
score = accuracy_score(y_predict, y_test)

# Output the accuracy
print(f"{score * 100:.2f}% of samples were classified correctly!")

# Save the trained model using a context manager
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)


Unique lengths of elements in data: {42, 84}
99.07% of samples were classified correctly!
