In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD, AdamW
from tensorflow.keras.datasets import cifar10
import numpy as np
import time


In [2]:
# Load CIFAR-10 dataset
(x_train_full, y_train_full), (x_test, y_test) = cifar10.load_data()

# Normalize images to range [0, 1]
x_train_full = x_train_full.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 1us/step


In [3]:
# Split training data into Train and Validation sets
# 50,000 images total, 5,000 used for validation
validation_size = 5000
x_valid, x_train = x_train_full[:validation_size], x_train_full[validation_size:]
y_valid, y_train = y_train_full[:validation_size], y_train_full[validation_size:]


In [4]:
# Convert labels to One-Hot Encoding
# CategoricalCrossentropy requires one-hot encoded labels
# (SparseCategoricalCrossentropy could be used instead to avoid this step)
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_valid_cat = tf.keras.utils.to_categorical(y_valid, num_classes=10)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=10)


In [22]:
# Optional: Use a smaller subset of training data (resource-limited setting)
x_train = x_train[:10000]
y_train = y_train[:10000]
y_train_cat = y_train_cat[:10000]


In [5]:
# Base Model Definition

def create_base_model():
    model = Sequential([
        # First convolutional layer
        Conv2D(64, (4, 4), activation='relu',
               input_shape=(32, 32, 3), padding='same'),
        
        # First max pooling layer
        MaxPooling2D((2, 2)),
        
        # Second convolutional layer
        Conv2D(32, (4, 4), activation='relu', padding='same'),
        
        # Second max pooling layer
        MaxPooling2D((2, 2)),
        
        # Flatten for dense layers
        Flatten(),
        
        # Fully connected layer
        Dense(256, activation='relu'),
        
        # Output layer (10 classes)
        Dense(10, activation='softmax')
    ])
    
    # Compile model
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model


In [6]:
# Create and summarize the base model
base_model = create_base_model()
base_model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Train the base model (for final reporting)
history_base = base_model.fit(
    x_train, y_train_cat,
    epochs=50,
    batch_size=32,
    validation_data=(x_valid, y_valid_cat),
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)]
)


Epoch 1/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - accuracy: 0.4804 - loss: 1.4466 - val_accuracy: 0.5944 - val_loss: 1.1689
Epoch 2/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 18ms/step - accuracy: 0.6195 - loss: 1.0678 - val_accuracy: 0.6348 - val_loss: 1.0358
Epoch 3/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 20ms/step - accuracy: 0.6838 - loss: 0.9014 - val_accuracy: 0.6664 - val_loss: 0.9613
Epoch 4/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 21ms/step - accuracy: 0.7273 - loss: 0.7706 - val_accuracy: 0.6954 - val_loss: 0.9091
Epoch 5/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 19ms/step - accuracy: 0.7683 - loss: 0.6561 - val_accuracy: 0.6960 - val_loss: 0.9210
Epoch 6/50
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 17ms/step - accuracy: 0.8018 - loss: 0.5556 - val_accuracy: 0.7020 - val_loss: 0.9416
Epoc

In [7]:
# Model (a): Reduced Channels + Dropout

def create_model_a():
    model = Sequential([
        # First convolutional layer: reduced filters (64 → 32)
        Conv2D(32, (4, 4), activation='relu',
               input_shape=(32, 32, 3), padding='same'),
        MaxPooling2D((2, 2)),
        
        # Second convolutional layer
        Conv2D(32, (4, 4), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        
        Flatten(),
        
        # Fully connected layer
        Dense(256, activation='relu'),
        
        # Dropout with 30% probability
        Dropout(0.3),
        
        # Output layer
        Dense(10, activation='softmax')
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model


In [8]:
# Training and Evaluation Framework

def run_experiment(
    model_creator,
    name,
    x_train_data, y_train_data,
    x_valid_data, y_valid_data,
    x_test_data, y_test_data,
    epochs=50,
    batch_size=32,
    runs=2
):
    accuracies = []
    training_times = []
    
    print(f"--- Starting experiment: {name} ---")
    
    for i in range(runs):
        tf.random.set_seed(42 + i)  # Reproducibility
        model = model_creator()
        
        start_time = time.time()
        
        # Early stopping based on validation loss
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
        
        history = model.fit(
            x_train_data, y_train_data,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(x_valid_data, y_valid_data),
            callbacks=[early_stopping],
            verbose=0
        )
        
        training_time = time.time() - start_time
        training_times.append(training_time)
        
        # Final evaluation on test set
        _, accuracy = model.evaluate(x_test_data, y_test_data, verbose=0)
        accuracies.append(accuracy)
        
        print(
            f"Run {i+1}: Test Accuracy = {accuracy:.4f}, "
            f"Training Time = {training_time:.2f} seconds"
        )
    
    avg_accuracy = np.mean(accuracies)
    avg_time = np.mean(training_times)
    
    print(f"\nFinal Results for {name}:")
    print(f"Average Test Accuracy (2 runs): {avg_accuracy:.4f}")
    print(f"Average Training Time (2 runs): {avg_time:.2f} seconds")
    
    return history, avg_accuracy, avg_time


In [12]:
# Run experiments

history_base, acc_base, time_base = run_experiment(
    create_base_model,
    "Base Model",
    x_train, y_train_cat,
    x_valid, y_valid_cat,
    x_test, y_test_cat
)

history_a, acc_a, time_a = run_experiment(
    create_model_a,
    "Model A",
    x_train, y_train_cat,
    x_valid, y_valid_cat,
    x_test, y_test_cat
)


--- Starting experiment: Base Model ---
Run 1: Test Accuracy = 0.6531, Training Time = 265.35 seconds
Run 2: Test Accuracy = 0.6655, Training Time = 259.02 seconds

Final Results for Base Model:
Average Test Accuracy (2 runs): 0.6593
Average Training Time (2 runs): 262.19 seconds
--- Starting experiment: Model A ---
Run 1: Test Accuracy = 0.6903, Training Time = 233.20 seconds
Run 2: Test Accuracy = 0.6763, Training Time = 194.32 seconds

Final Results for Model A:
Average Test Accuracy (2 runs): 0.6833
Average Training Time (2 runs): 213.76 seconds


In [14]:
def create_model_b():
    model = Sequential([
        # لایه 1
        Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3), padding='same'),
        MaxPooling2D((2, 2)),
        
        # لایه 2
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        
        # لایه 3 (جدید) - بدون Max Pooling
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        
        # لایه 4 (جدید) - بدون Max Pooling
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        
        Flatten(),
        
        Dense(256, activation='relu'),
        
        Dense(10, activation='softmax')
    ])
    
    # استفاده از همان هایپرپارامترهای پایه
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [15]:

model_b = create_model_b()
model_b.summary()
history_b, acc_b, time_b = run_experiment(create_model_b, "Model B", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)

--- Starting experiment: Model B ---
Run 1: Test Accuracy = 0.6357, Training Time = 143.63 seconds
Run 2: Test Accuracy = 0.6911, Training Time = 149.63 seconds

Final Results for Model B:
Average Test Accuracy (2 runs): 0.6634
Average Training Time (2 runs): 146.63 seconds


In [18]:
def create_model_p():
    model = Sequential([
        # لایه پیچشی اول
        Conv2D(64, (4, 4), padding='same', input_shape=(32, 32, 3)),
        BatchNormalization(), # <-- BN
        tf.keras.layers.Activation('relu'), # <-- ReLU
        MaxPooling2D((2, 2)),
        
        # لایه پیچشی دوم
        Conv2D(32, (4, 4), padding='same'),
        BatchNormalization(), # <-- BN
        tf.keras.layers.Activation('relu'), # <-- ReLU
        MaxPooling2D((2, 2)),
        
        # صاف کردن (Flatten)
        Flatten(),
        
        # لایه کاملاً متصل اول
        Dense(256),
        BatchNormalization(), # <-- BN
        tf.keras.layers.Activation('relu'), # <-- ReLU
        
        # لایه کاملاً متصل خروجی
        Dense(10, activation='softmax')
    ])
    
    # استفاده از همان هایپرپارامترهای پایه
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [19]:
model_p = create_model_p()
model_p.summary()
history_p, acc_p, time_p = run_experiment(create_model_p, "Model P (Batch Norm)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)

--- Starting experiment: Model P (Batch Norm) ---
Run 1: Test Accuracy = 0.6586, Training Time = 473.07 seconds
Run 2: Test Accuracy = 0.6342, Training Time = 395.60 seconds

Final Results for Model P (Batch Norm):
Average Test Accuracy (2 runs): 0.6464
Average Training Time (2 runs): 434.34 seconds


In [None]:
def create_model_t(lr):
    model = Sequential([
        # همان معماری پایه
        Conv2D(64, (4, 4), activation='relu', input_shape=(32, 32, 3), padding='same'),
        MaxPooling2D((2, 2)),
        Conv2D(32, (4, 4), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dense(10, activation='softmax')
    ])
    
    # کامپایل با نرخ یادگیری متفاوت
    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [23]:

history_t_1, acc_t_1, time_t_1 = run_experiment(lambda: create_model_t(0.1), "Model T (LR=0.1)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)
history_t_3, acc_t_3, time_t_3 = run_experiment(lambda: create_model_t(0.001), "Model T (LR=0.001) [Base]", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)
history_t_4, acc_t_4, time_t_4 = run_experiment(lambda: create_model_t(0.0001), "Model T (LR=0.0001)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)

--- Starting experiment: Model T (LR=0.1) ---
Run 1: Test Accuracy = 0.1000, Training Time = 36.44 seconds
Run 2: Test Accuracy = 0.1000, Training Time = 35.11 seconds

Final Results for Model T (LR=0.1):
Average Test Accuracy (2 runs): 0.1000
Average Training Time (2 runs): 35.77 seconds
--- Starting experiment: Model T (LR=0.001) [Base] ---
Run 1: Test Accuracy = 0.5655, Training Time = 58.36 seconds
Run 2: Test Accuracy = 0.5443, Training Time = 58.92 seconds

Final Results for Model T (LR=0.001) [Base]:
Average Test Accuracy (2 runs): 0.5549
Average Training Time (2 runs): 58.64 seconds
--- Starting experiment: Model T (LR=0.0001) ---
Run 1: Test Accuracy = 0.5745, Training Time = 183.46 seconds
Run 2: Test Accuracy = 0.5922, Training Time = 215.15 seconds

Final Results for Model T (LR=0.0001):
Average Test Accuracy (2 runs): 0.5834
Average Training Time (2 runs): 199.30 seconds


In [None]:
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras import backend as K

# تابع Swish برای استفاده در Keras (اگرچه در نسخه‌های جدیدتر Keras/TF به صورت توکار وجود دارد)
def swish(x):
    return x * K.sigmoid(x)

def create_model_th(activation_fn):
    # برای Leaky ReLU، آن را به عنوان یک لایه مجزا اضافه می‌کنیم
    if activation_fn == 'leaky_relu':
        activation_layer_1 = LeakyReLU(alpha=0.01)
        activation_layer_2 = LeakyReLU(alpha=0.01)
        activation_layer_dense = LeakyReLU(alpha=0.01)
        
        # لایه‌های Conv بدون activation مستقیم تعریف می‌شوند
        conv_1_act = None
        conv_2_act = None
        dense_1_act = None
    else:
        # برای ReLU, Swish و بقیه، از پارامتر activation استفاده می‌کنیم
        conv_1_act = activation_fn
        conv_2_act = activation_fn
        dense_1_act = activation_fn
        
        activation_layer_1 = None
        activation_layer_2 = None
        activation_layer_dense = None

    model = Sequential()
    
    # لایه پیچشی اول
    model.add(Conv2D(64, (4, 4), activation=conv_1_act, input_shape=(32, 32, 3), padding='same'))
    if activation_layer_1:
        model.add(activation_layer_1)
    model.add(MaxPooling2D((2, 2)))
    
    # لایه پیچشی دوم
    model.add(Conv2D(32, (4, 4), activation=conv_2_act, padding='same'))
    if activation_layer_2:
        model.add(activation_layer_2)
    model.add(MaxPooling2D((2, 2)))
    
    model.add(Flatten())
    
    # لایه کاملاً متصل اول
    model.add(Dense(256, activation=dense_1_act))
    if activation_layer_dense:
        model.add(activation_layer_dense)
    
    # لایه خروجی
    model.add(Dense(10, activation='softmax'))
    
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# اجرای آزمایش برای مدل ث
# history_th_leaky, acc_th_leaky, time_th_leaky = run_experiment(lambda: create_model_th('leaky_relu'), "Model TH (Leaky ReLU)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)
# history_th_swish, acc_th_swish, time_th_swish = run_experiment(lambda: create_model_th(swish), "Model TH (Swish)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)

In [None]:
# برای AdamW نیاز به نصب افزونه Keras/TF Addons است. اگر نصب نیست، از Adam با Weight Decay استفاده می‌کنیم.
# فرض می‌کنیم از TensorFlow Extras برای AdamW استفاده می‌کنید:
from tensorflow_addons.optimizers import AdamW as tfa_AdamW
# یا اگر tf-addons نصب نیست، فقط Adam و SGD را مقایسه کنید:
# from tensorflow.keras.optimizers import Adam, SGD 

def create_model_j(optimizer_type):
    model = create_base_model() # استفاده از معماری پایه
    
    if optimizer_type == 'sgd':
        optimizer = SGD(learning_rate=0.001, momentum=0.9) # SGD معمولا با Momentum بهتر عمل می‌کند
    elif optimizer_type == 'adamw':
        # AdamW با نرخ یادگیری پایه و تنظیم وزن 0.001
        optimizer = tfa_AdamW(learning_rate=0.001, weight_decay=0.001) 
    else: # Adam (پایه)
        optimizer = Adam(learning_rate=0.001)
        
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# اجرای آزمایش برای مدل ج
# history_j_sgd, acc_j_sgd, time_j_sgd = run_experiment(lambda: create_model_j('sgd'), "Model J (SGD)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)
# history_j_adamw, acc_j_adamw, time_j_adamw = run_experiment(lambda: create_model_j('adamw'), "Model J (AdamW)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)

In [None]:
# از تابع create_base_model استفاده می‌کنیم و فقط پارامتر batch_size در run_experiment را تغییر می‌دهیم.

# اجرای آزمایش برای مدل چ
# تاریخچه پایه: history_ch_32, acc_ch_32, time_ch_32 = run_experiment(create_base_model, "Model CH (Batch=32) [Base]", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat, batch_size=32)
# تاریخچه 4: history_ch_4, acc_ch_4, time_ch_4 = run_experiment(create_base_model, "Model CH (Batch=4)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat, batch_size=4)
# تاریخچه 128: history_ch_128, acc_ch_128, time_ch_128 = run_experiment(create_base_model, "Model CH (Batch=128)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat, batch_size=128)

In [None]:
import matplotlib.pyplot as plt

def visualize_feature_maps_and_kernels(model, image, layer_index=0):
    # 1. استخراج خروجی لایه پیچشی اول
    # یک مدل جدید می‌سازیم که خروجی لایه اول را بدهد
    layer_output_model = tf.keras.Model(inputs=model.input, outputs=model.layers[layer_index].output)
    
    # تصویر ورودی را به ابعاد مناسب (1, 32, 32, 3) در می‌آوریم
    img_tensor = np.expand_dims(image, axis=0)
    
    # خروجی (Feature Maps)
    feature_maps = layer_output_model.predict(img_tensor)[0]
    n_features = feature_maps.shape[-1]
    
    # 2. استخراج کرنل‌ها
    # وزن‌ها (کرنل‌ها) و بایاس‌ها از لایه پیچشی
    weights, biases = model.layers[layer_index].get_weights()
    
    # 3. نمایش Feature Maps
    print(f"Displaying {n_features} Feature Maps (Grayscale):")
    fig_f, axes_f = plt.subplots(8, 8, figsize=(12, 12)) # برای 64 فیلتر
    for i in range(n_features):
        ax = axes_f.flat[i]
        # نمایش به صورت تصویر خاکستری
        ax.imshow(feature_maps[:, :, i], cmap='gray')
        ax.set_title(f'Map {i+1}', fontsize=8)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

    # 4. نمایش کرنل‌ها
    print(f"\nDisplaying {n_features} Kernels (4x4):")
    fig_k, axes_k = plt.subplots(8, 8, figsize=(12, 12))
    for i in range(n_features):
        ax = axes_k.flat[i]
        # کرنل را نرمالایز می‌کنیم تا بهتر نمایش داده شود
        kernel = weights[:, :, :, i]
        # متوسط کانال‌های ورودی را نمایش می‌دهیم
        ax.imshow(np.mean(kernel, axis=2), cmap='viridis') 
        ax.set_title(f'Kernel {i+1}', fontsize=8)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# --- اجرای مثال (بعد از آموزش بهترین مدل) ---
# بهترین مدل (Best_Model)
# image_to_test = x_test[0]
# visualize_feature_maps_and_kernels(Best_Model, image_to_test, layer_index=0)
# توجه: layer_index=0 لایه پیچشی اول است.

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

def evaluate_and_visualize(model, x_test, y_test, class_names):
    
    # پیش‌بینی‌ها
    y_pred_probs = model.predict(x_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1) # اگر y_test One-Hot باشد

    # 1. نمایش تعدادی از نتایج پیش‌بینی (خ)
    print("--- 1. Sample Predictions (Actual vs Predicted) ---")
    
    fig, axes = plt.subplots(2, 5, figsize=(15, 6))
    for i in range(10):
        ax = axes.flat[i]
        idx = np.random.randint(0, len(x_test))
        ax.imshow(x_test[idx])
        ax.set_title(f"True: {class_names[y_true[idx]]}\nPred: {class_names[y_pred[idx]]}", fontsize=10)
        ax.axis('off')
    plt.tight_layout()
    plt.show()
    
    # 2. نمایش ماتریس درهم‌ریختگی (د)
    print("\n--- 2. Confusion Matrix ---")
    
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

# --- اجرای مثال (بعد از آموزش بهترین مدل) ---
# class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# evaluate_and_visualize(Best_Model, x_test, y_test_cat, class_names)

In [None]:
from tensorflow.keras.applications import VGG19, ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D

def create_transfer_model(base_model_name):
    
    input_shape = (32, 32, 3)
    
    if base_model_name == 'VGG19':
        base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
    elif base_model_name == 'ResNet50':
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    else:
        raise ValueError("Invalid base model name.")
        
    # ثابت نگه داشتن لایه‌های استخراج ویژگی
    base_model.trainable = False
    
    # ساخت مدل نهایی
    model = Sequential([
        base_model,
        # جایگزین کردن Max/Avg Pooling نهایی
        Flatten(), # یا GlobalAveragePooling2D()
        
        # لایه‌های اضافه شده مشابه CNN پایه
        Dense(256, activation='relu'),
        Dense(10, activation='softmax')
    ])
    
    # نرخ یادگیری پایین‌تر برای Transfer Learning معمولاً بهتر است
    model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# اجرای آزمایش برای مدل ذ
# history_z_vgg, acc_z_vgg, time_z_vgg = run_experiment(lambda: create_transfer_model('VGG19'), "Model Z (VGG19)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)
# history_z_resnet, acc_z_resnet, time_z_resnet = run_experiment(lambda: create_transfer_model('ResNet50'), "Model Z (ResNet50)", x_train, y_train_cat, x_valid, y_valid_cat, x_test, y_test_cat)