In [29]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from keras import models
from keras.models import Sequential
from keras.optimizers import Adam
import tensorflow as tf
import os
import copy
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Flatten
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import CLIPProcessor, CLIPModel

In [2]:
def cross_validate_and_select_best_params(model, X, y, n_splits=5):
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    scores = {
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f1': [],
        'roc_auc': []
    }

    for train_index, test_index in kfold.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # print(f"y_train shape = {y_train.shape}, y_test shape = {y_test.shape}")
        history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_data=(X_test, y_test))

        y_pred_prob = model.predict(X_test)
        y_pred = np.argmax(y_pred_prob, axis=1)
        y_test = np.argmax(y_test, axis=1) 
        
        scores['accuracy'].append(accuracy_score(y_test, y_pred))
        scores['precision'].append(precision_score(y_test, y_pred, average='macro'))
        scores['recall'].append(recall_score(y_test, y_pred, average='macro'))
        scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
        scores['roc_auc'].append(roc_auc_score(y_test, y_pred_prob, multi_class='ovr', average='macro'))  # ROC AUC 多分类问题

    avg_scores = {key: np.mean(value) for key, value in scores.items()}
    return avg_scores

In [3]:
def load_cifar10_data(data_dir):
    X = []
    Y = []

    # Load training data batches
    for i in range(1, 6):
        with open(os.path.join(data_dir, f'data_batch_{i}'), 'rb') as file:
            batch = pickle.load(file, encoding='latin1')
            X.append(batch['data'])
            Y.extend(batch['labels'])

    # Combine training data
    X = np.concatenate(X)

    # Load test data
    with open(os.path.join(data_dir, 'test_batch'), 'rb') as file:
        test_batch = pickle.load(file, encoding='latin1')
        X_test = test_batch['data']
        Y_test = test_batch['labels']

    # Combine test data
    X = np.vstack((X, X_test))
    Y.extend(Y_test)

    # Convert labels to numpy array
    Y = np.array(Y)

    # Note: Removing the reshaping operation
    # X = X.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)  # (N, H, W, C)

    # Load label names from batches.meta
    with open(os.path.join(data_dir, 'batches.meta'), 'rb') as file:
        meta = pickle.load(file, encoding='latin1')
        label_names = meta['label_names']

    return X, Y, label_names


In [4]:
data_dir = 'cifar-10-batches-py'  # path directory
X_combined, y_combined, meta = load_cifar10_data(data_dir)

In [17]:
X_combined_reshaped = X_combined.reshape(-1, 32, 32, 3)
y_combined_categorical = to_categorical(y_combined, num_classes=10)
X_combined_reshaped = X_combined_reshaped.astype('float32') / 255.0

True


In [31]:
class CLIPDataset(Dataset):
    def __init__(self, images, labels, processor):
        self.images = images
        self.labels = labels
        self.processor = processor

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # CLIP 需要将图像转换为张量
        text = f"Label is {np.argmax(label)}"  # 或者使用更合适的文本描述
        inputs = self.processor(images=image, text=text, return_tensors="pt", padding=True, truncation=True)

        return inputs['pixel_values'][0], inputs['input_ids'][0], label

# 创建处理器和模型
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 优化器
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# 5折交叉验证
kf = KFold(n_splits=5, shuffle=True)
all_labels = []
all_preds = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_combined_reshaped)):
    print(f"Fold {fold + 1}")

    # 划分训练和验证数据集
    X_train, X_val = X_combined_reshaped[train_idx], X_combined_reshaped[val_idx]
    y_train, y_val = y_combined_categorical[train_idx], y_combined_categorical[val_idx]

    # 创建数据集和数据加载器
    train_dataset = CLIPDataset(X_train, y_train, processor)
    val_dataset = CLIPDataset(X_val, y_val, processor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 训练模型
    model.train()
    for epoch in range(1):  # 训练3个epoch
        for batch in train_loader:
            images, input_ids, labels = batch
            images = images.to(device)
            input_ids = input_ids.to(device)

            # 前向传播
            outputs = model(pixel_values=images, input_ids=input_ids)
            logits_per_image = outputs.logits_per_image  # 图像的 logits
            logits_per_text = outputs.logits_per_text  # 文本的 logits

            # 计算损失
            labels = torch.arange(len(images), device=device)  # 使用一组标签
            loss_img_text = torch.nn.CrossEntropyLoss()(logits_per_image, labels)  # 计算图像和文本的交叉熵损失
            loss_text_img = torch.nn.CrossEntropyLoss()(logits_per_text, labels)  # 计算文本和图像的交叉熵损失
            loss = (loss_img_text + loss_text_img) / 2  # 取平均值作为最终损失

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print(f"Epoch [{epoch + 1}/3], Loss: {loss.item():.4f}")

    # 验证模型
    model.eval()
    val_preds = []
    val_labels = []

    with torch.no_grad():
        for batch in val_loader:
            images, input_ids, labels = batch
            images = images.to(device)
            input_ids = input_ids.to(device)

            outputs = model(pixel_values=images, input_ids=input_ids)
            logits_per_image = outputs.logits_per_image  # 图像的 logits

            # 选择最高的 logit 作为预测
            preds = torch.argmax(logits_per_image, dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_labels.extend(np.argmax(labels.cpu().numpy(), axis=1))

    all_labels.extend(val_labels)
    all_preds.extend(val_preds)

# 计算指标
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

# 计算ROC AUC（只适用于二分类问题）
if len(set(all_labels)) == 2:  # 只有在二分类时才能计算
    roc_auc = roc_auc_score(all_labels, all_preds)
else:
    roc_auc = None

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}" if roc_auc is not None else "ROC AUC: Not applicable for multi-class")
print("训练完成！")

Fold 1
Epoch [1/3], Loss: 3.5803
Epoch [1/3], Loss: 3.4817
Epoch [1/3], Loss: 3.4659


KeyboardInterrupt: 

In [9]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"Detected {len(gpus)} GPU(s): {[gpu.name for gpu in gpus]}")
else:
    print("No GPUs detected.")

No GPUs detected.


In [8]:
X_combined_seq = X_combined_reshaped.reshape(-1, 32, 32 * 3)  # 32个时间步，每步32*3特征

# 创建 LSTM 模型
model = Sequential()

# 添加时间分布层，以处理每个时间步的特征
model.add(TimeDistributed(Dense(128, activation='relu'), input_shape=(32, 32 * 3)))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))  # 输出层，10类

# 编译模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 训练模型
model.fit(X_combined_seq, y_combined_categorical, epochs=10, batch_size=32, validation_split=0.2)

# 评估模型
loss, accuracy = model.evaluate(X_combined_seq, y_combined_categorical)
print(f'Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')

  super().__init__(**kwargs)


Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 56ms/step - accuracy: 0.2364 - loss: 2.0594 - val_accuracy: 0.4176 - val_loss: 1.6081
Epoch 2/10
[1m1072/1500[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m20s[0m 47ms/step - accuracy: 0.3897 - loss: 1.6739

KeyboardInterrupt: 

In [16]:
def build_simple_cnn(input_shape, num_classes, learning_rate=0.0005, num_filter=16, filter_size=(3, 3)):
    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))  
    model.add(layers.Conv2D(num_filter, filter_size, activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [11]:
print(y_combined.shape)

(60000,)


In [17]:
input_shape = (32, 32, 3)
num_classes = 10

num_filters = [8]  
learning_rates = [0.001, 0.0005, 0.0015, 0.01, 0.005]  
filter_sizes = [(1, 1), (3, 3), (5, 5), (7, 7), (9, 9)]  

highest_config = []
highest_accuracy = 0

for num_filter in num_filters:
    for learning_rate in learning_rates:  
        highest_filter_size = (0, 0)
        highest_filter_size_accuracy = 0
        best_filter_size = False
        
        for filter_size in filter_sizes:  
            current_config = [learning_rate, num_filter, filter_size]
            model = build_simple_cnn(input_shape, num_classes, learning_rate, num_filter, filter_size)

            X_combined_reshaped = X_combined.reshape(-1, 32, 32, 3)
            y_combined_categorical = to_categorical(y_combined, num_classes=10)
            X_combined_reshaped = X_combined_reshaped.astype('float32') / 255.0

            with tf.device('/GPU:0'):
                avg_scores = cross_validate_and_select_best_params(model, X_combined_reshaped, y_combined_categorical)

            print("current_config: ", current_config)
            print(avg_scores)

            if avg_scores['accuracy'] >= highest_filter_size_accuracy:
                highest_filter_size_accuracy = avg_scores['accuracy']
                highest_filter_size = current_config[2]
                best_filter_size = False
            elif highest_filter_size_accuracy > avg_scores['accuracy'] and filter_size > highest_filter_size:
                print("Last filter_size is best, break")
                best_filter_size = True

            if avg_scores['accuracy'] > highest_accuracy:
                highest_config = current_config
                highest_accuracy = avg_scores['accuracy']
                print("highest_config: ", highest_config)
                print("highest_accuracy: ", highest_accuracy)

            if best_filter_size:
                break

print("highest_config: ", highest_config)
print("highest_accuracy: ", highest_accuracy)


Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3807 - loss: 1.8157 - val_accuracy: 0.4438 - val_loss: 1.6198
Epoch 2/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5661 - loss: 1.2434 - val_accuracy: 0.5511 - val_loss: 1.2913
Epoch 3/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6206 - loss: 1.0806 - val_accuracy: 0.5703 - val_loss: 1.2527
Epoch 4/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6730 - loss: 0.9362 - val_accuracy: 0.5782 - val_loss: 1.2434
Epoch 5/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7096 - loss: 0.8323 - val_accuracy: 0.5767 - val_loss: 1.3026
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 979us/step
Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7008 - loss: 0.8831

In [19]:
input_shape = (32, 32, 3)
num_classes = 10

num_filters=[8, 16, 32, 64, 128]
best_learning_rate=highest_config[0]
best_filter_sizes=highest_config[2]

highest_config_last=[]
highest_accuracy_last=0
for num_filter in num_filters:
    current_config = [best_learning_rate, num_filter, best_filter_sizes]
    model = build_simple_cnn(input_shape, num_classes, best_learning_rate, num_filter, best_filter_sizes)

    X_combined_reshaped = X_combined.reshape(-1, 32, 32, 3)
    y_combined_categorical = to_categorical(y_combined, num_classes=10)
    X_combined_reshaped = X_combined_reshaped.astype('float32') / 255.0
    with tf.device('/GPU:0'):
        avg_scores = cross_validate_and_select_best_params(model, X_combined_reshaped, y_combined_categorical)
    print("current_config: ", current_config)
    print(avg_scores)

    if avg_scores['accuracy'] > highest_accuracy_last:
        highest_config_last = current_config
        highest_accuracy_last = avg_scores['accuracy']
        print("highest_config_last: ", highest_config_last)
        print("highest_accuracy_last: ", highest_accuracy_last)
            
print("highest_config_last: ", highest_config_last)
print("highest_accuracy_last: ", highest_accuracy_last)

IndexError: list index out of range

In [4]:
X_combined_reshaped = X_combined.reshape(-1, 32, 32, 3)
y_combined_categorical = to_categorical(y_combined, num_classes=10)
X_combined_reshaped = X_combined_reshaped.astype('float32') / 255.0

In [21]:
# GFNet simple
def global_filter_layer(x, filters, kernel_size=(3, 3), strides=1):
    # simple Conv2d
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same')(x)
    return x

# build GFNet model
def build_gfnet(input_shape, num_classes, learning_rate=0.0005):
    inputs = layers.Input(shape=input_shape)

    x = global_filter_layer(inputs, filters=64, kernel_size=(3, 3), strides=1)  # Adjusted
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = global_filter_layer(x, filters=64, kernel_size=(3, 3), strides=1)  # Adjusted
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.GlobalAveragePooling2D()(x)  # Keep this layer
    x = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, x)
    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])  # Added learning rate
    return model

input_shape = (32, 32, 3)
num_classes = 10

model = build_gfnet(input_shape, num_classes)

model.summary()

history = model.fit(X_combined_reshaped, y_combined_categorical, 
                    epochs=5, 
                    batch_size=128, 
                    validation_split=0.1)

print(f"Training finished. Accuracy: {history.history['accuracy'][-1]}")

ValueError: Exception encountered when calling GlobalFilterLayer.call().

[1mCould not automatically infer the output shape / dtype of 'global_filter_layer_6' (of type GlobalFilterLayer). Either the `GlobalFilterLayer.call()` method is incorrect, or you need to implement the `GlobalFilterLayer.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimensions must be equal, but are 32 and 3 for '{{node mul_1}} = Mul[T=DT_COMPLEX64](FFT2D, Reshape)' with input shapes: [?,32,32,3], [1,1,3,64].[0m

Arguments received by GlobalFilterLayer.call():
  • args=('<KerasTensor shape=(None, 32, 32, 3), dtype=float32, sparse=False, name=keras_tensor_82>',)
  • kwargs=<class 'inspect._empty'>

In [10]:
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# 定义Swin Transformer模型
class SwinTransformerBlock(layers.Layer):
    def __init__(self, num_heads, window_size, mlp_ratio=4.0):
        super(SwinTransformerBlock, self).__init__()
        self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=64)
        self.mlp = models.Sequential([
            layers.Dense(int(64 * mlp_ratio), activation='gelu'),
            layers.Dense(64)
        ])
        self.norm1 = layers.LayerNormalization(epsilon=1e-5)
        self.norm2 = layers.LayerNormalization(epsilon=1e-5)

    def call(self, inputs):
        attn = self.attention(inputs, inputs)
        x = self.norm1(inputs + attn)  # Residual connection
        mlp_output = self.mlp(x)
        return self.norm2(x + mlp_output)  # Residual connection

class SwinTransformer(models.Model):
    def __init__(self, num_classes):
        super(SwinTransformer, self).__init__()
        self.conv = layers.Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu')
        self.block1 = SwinTransformerBlock(num_heads=4, window_size=4)
        self.block2 = SwinTransformerBlock(num_heads=4, window_size=4)
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(num_classes, activation='softmax')

    def call(self, inputs):
        x = self.conv(inputs)
        x = tf.reshape(x, (-1, x.shape[1], x.shape[2], 64))
        x = self.block1(x)
        x = self.block2(x)
        x = self.flatten(x)
        return self.dense(x)

# 实例化模型并编译
model = SwinTransformer(num_classes=10)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 训练模型
with tf.device('/GPU:0'):  # 显式指定使用 GPU
    history = model.fit(X_combined_reshaped, y_combined_categorical, batch_size=64, epochs=10, validation_split=0.2)

# 保存模型
model.save('swin_transformer_cifar10.h5')
print(f"Training finished. Accuracy: {history.history['accuracy'][-1]}")

Epoch 1/10
[1m 14/750[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m54:02[0m 4s/step - accuracy: 0.1334 - loss: 29.3850

KeyboardInterrupt: 

In [11]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"可用的GPU设备: {gpus}")
else:
    print("没有检测到可用的GPU设备。")

没有检测到可用的GPU设备。


In [12]:
import tensorflow as tf
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import to_categorical
def create_vit_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    # 使用 TensorFlow Keras 应用程序中的 ViT 模型
    vit = tf.keras.applications.VisionTransformer(
        input_shape=input_shape,
        include_top=True,
        weights=None,
        classes=num_classes
    )
    
    outputs = vit(inputs)
    model = Model(inputs, outputs)
    return model

# 创建模型
input_shape = (32, 32, 3)
num_classes = 10
model = create_vit_model(input_shape, num_classes)

# 编译模型
model.compile(optimizer=Adam(learning_rate=1e-4), 
              loss=CategoricalCrossentropy(), 
              metrics=['accuracy'])

# 设置 EarlyStopping 回调
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 训练模型
history = model.fit(X_combined_reshaped, y_combined, 
                    validation_split=0.2, 
                    epochs=50, 
                    batch_size=64, 
                    callbacks=[early_stopping])

# 保存模型
model.save('vit_cifar10_model.h5')

AttributeError: module 'keras.api.applications' has no attribute 'VisionTransformer'

In [22]:

from openmixup import OpenMixup

ModuleNotFoundError: No module named 'openmixup'

In [1]:
import torch
print("Is CUDA available: ", torch.cuda.is_available())
print("CUDA version: ", torch.version.cuda)
print("Number of GPUs: ", torch.cuda.device_count())


Is CUDA available:  True
CUDA version:  12.4
Number of GPUs:  1


In [13]:
from openmixup import OpenMixup

# 假设您已经有以下数据
# X_combined_reshaped: 处理好的输入数据
# y_combined: 处理好的标签数据

# 创建 OpenMixup 实例
mixup = OpenMixup(alpha=0.2)  # alpha 参数可根据需要调整

# 构建简单的模型
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(100, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 将 OpenMixup 应用于数据
# 这里假设您希望在训练中使用混合样本
X_mixed, y_mixed = mixup(X_combined_reshaped, y_combined)

# 训练模型
model.fit(X_mixed, y_mixed, batch_size=64, epochs=10, validation_split=0.2)

ModuleNotFoundError: No module named 'openmixup'