In [14]:
import os
import numpy as np
from skimage.transform import resize
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

def load_and_preprocess_features(main_directory, size=64):
    features = []
    labels = []

    # Traverse the main directory and subfolders
    for subfolder in os.listdir(main_directory):
        subfolder_path = os.path.join(main_directory, subfolder)

        if os.path.isdir(subfolder_path):
            # Determine the label from the subfolder name (TD for normal, ASD for autism)
            label = 0 if "TD" in subfolder else 1  # 0 for normal, 1 for autism
            
            # Loop through files in the subfolder
            for file in os.listdir(subfolder_path):
                if file.endswith('_features.txt'):
                    file_path = os.path.join(subfolder_path, file)
                    
                    # Read features from the .txt file
                    feature_data = {}
                    with open(file_path, 'r') as f:
                        lines = f.readlines()
                        current_feature = None
                        for line in lines:
                            if line.startswith("Feature:"):
                                current_feature = line.strip().split(":")[1].strip()
                                feature_data[current_feature] = []
                            elif current_feature:
                                try:
                                    # Ensure the line contains valid numbers
                                    feature_values = list(map(float, line.strip().split(',')))
                                    feature_data[current_feature].append(feature_values)
                                except ValueError:
                                    # Skip lines that do not contain valid numeric data
                                    continue
                    
                    # Resize and concatenate features
                    all_features = []
                    for feature_name, feature_values in feature_data.items():
                        feature_array = np.array(feature_values)
                        resized_feature = resize(feature_array, (size, size))
                        all_features.append(resized_feature)

                    # Stack features into 3D (size, size, num_features)
                    concatenated_features = np.stack(all_features, axis=-1)
                    features.append(concatenated_features)
                    labels.append(label)

    return np.array(features), np.array(labels)

# Directories for train, validation, and test sets
main_directory = "data/Clean_splited"
train_dir = os.path.join(main_directory, "train")
val_dir = os.path.join(main_directory, "val")
test_dir = os.path.join(main_directory, "test")

# Load and preprocess the data
X_train, y_train = load_and_preprocess_features(train_dir, size=256)
X_val, y_val = load_and_preprocess_features(val_dir, size=256)
X_test, y_test = load_and_preprocess_features(test_dir, size=256)

# Reshape and normalize the data
X_train = X_train.astype('float32') / np.max(X_train)
X_val = X_val.astype('float32') / np.max(X_val)
X_test = X_test.astype('float32') / np.max(X_test)

# One-hot encode the labels
y_train = to_categorical(y_train, 2)
y_val = to_categorical(y_val, 2)
y_test = to_categorical(y_test, 2)

# Build a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, X_train.shape[-1])),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the CNN model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")

# Predict and generate classification report
y_test_pred = np.argmax(model.predict(X_test), axis=1)
y_test_true = np.argmax(y_test, axis=1)
print(classification_report(y_test_true, y_test_pred, target_names=["Normal", "Autism"]))
