In [1]:
# Use the necessary imports
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Function to load images and labels from folders
def load_data_from_folders(feature_folders):
    X = []
    y = []
    for label, folder in enumerate(feature_folders):
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = image.load_img(img_path, target_size=(224, 224))
            x = image.img_to_array(img)
            x = preprocess_input(x)
            X.append(x)
            y.append(label)
    return np.array(X), np.array(y)

# Define feature folder
feature_folders =['archive/train/0', 'archive/train/1', 'archive/train/2','archive/train/3', 'archive/train/4', 'archive/train/5', 'archive/train/6']

# Load images and labels
X, y = load_data_from_folders(feature_folders)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape X_train and X_test to 2D arrays
n_samples_train, height, width, channels = X_train.shape
n_samples_test = X_test.shape[0]
X_train_flattened = X_train.reshape(n_samples_train, height * width * channels)
X_test_flattened = X_test.reshape(n_samples_test, height * width * channels)

# Normalize the feature data
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train_flattened)
X_test_normalized = scaler.transform(X_test_flattened)

# Initialize and train a decision tree classifier with hyperparameter tuning
param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
decision_tree = DecisionTreeClassifier()
grid_search = GridSearchCV(decision_tree, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_normalized, y_train)

# Get the best estimator from grid search
best_decision_tree = grid_search.best_estimator_

# Predict emotions for the test set
y_pred = best_decision_tree.predict(X_test_normalized)

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy after Hyperparameter Tuning:", accuracy)

# Save the trained decision tree model
model_filename = 'best_decision_tree_model.joblib'
dump(best_decision_tree, model_filename)

# Return the trained decision tree classifier
best_decision_tree


MemoryError: Unable to allocate 591. MiB for an array with shape (1029, 150528) and data type float32