In [2]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
from PIL import UnidentifiedImageError

In [4]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
from PIL import UnidentifiedImageError
import pandas as pd

# Define the folder paths
input_folder = './Data/fits_filtered2'
output_folder = './Data/fits_filtered2/DoubleDCGan1000epochsOnlyStreaks_ultraHD'
os.makedirs(output_folder, exist_ok=True)

# Flag for using only files with label 1
use_label_1_only = True

# Step 1: Load and Filter Images Based on Labels
def load_images_with_labels(folder, csv_file, image_size=(64, 64), use_label_1_only=True):
    images = []
    valid_files = 0
    invalid_files = 0
    try:
        # Read the CSV file
        label_data = pd.read_csv(csv_file)
    except FileNotFoundError:
        raise FileNotFoundError(f"CSV file '{csv_file}' not found in the folder '{folder}'.")

    # Filter files based on label if the flag is set
    if use_label_1_only:
        label_data = label_data[label_data['label'] == 1]

    # Load images
    for _, row in label_data.iterrows():
        filename = row['output']
        file_path = os.path.join(folder, filename)
        try:
            img = load_img(file_path, target_size=image_size)
            images.append(img_to_array(img))
            valid_files += 1
        except (UnidentifiedImageError, OSError):
            print(f"Skipping file {filename}, as it is not a valid image.")
            invalid_files += 1

    print(f"Loaded {valid_files} valid images, skipped {invalid_files} invalid images.")
    return np.array(images)

# Load dataset
csv_file_path = os.path.join(input_folder, 'dictionary_0.csv')
dataset = load_images_with_labels(input_folder, csv_file_path, image_size=(256, 256), use_label_1_only=use_label_1_only)
print(f"Dataset shape: {dataset.shape}")
if dataset.size == 0:
    raise ValueError("No valid images found in the dataset. Please check the image files or the CSV labels.")
dataset = (dataset - 127.5) / 127.5  # Normalize to [-1, 1]

def build_generator():
    model = tf.keras.Sequential()
    model.add(layers.Dense(1024 * 4 * 4, activation="relu", input_dim=100))  # Start with a smaller initial feature map
    model.add(layers.Reshape((4, 4, 1024)))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.UpSampling2D())  # 8x8
    model.add(layers.Conv2D(512, kernel_size=4, padding="same"))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Activation("relu"))
    model.add(layers.UpSampling2D())  # 16x16
    model.add(layers.Conv2D(256, kernel_size=4, padding="same"))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Activation("relu"))
    model.add(layers.UpSampling2D())  # 32x32
    model.add(layers.Conv2D(128, kernel_size=4, padding="same"))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Activation("relu"))
    model.add(layers.UpSampling2D())  # 64x64
    model.add(layers.Conv2D(64, kernel_size=4, padding="same"))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Activation("relu"))
    model.add(layers.UpSampling2D())  # 128x128
    model.add(layers.Conv2D(32, kernel_size=4, padding="same"))
    model.add(layers.BatchNormalization(momentum=0.8))
    model.add(layers.Activation("relu"))
    model.add(layers.UpSampling2D())  # 256x256
    model.add(layers.Conv2D(3, kernel_size=4, padding="same"))
    model.add(layers.Activation("tanh"))
    return model

def build_discriminator():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, kernel_size=4, strides=2, input_shape=(256, 256, 3), padding="same"))  # 256x256 -> 128x128
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(128, kernel_size=4, strides=2, padding="same"))  # 128x128 -> 64x64
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(256, kernel_size=4, strides=2, padding="same"))  # 64x64 -> 32x32
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(512, kernel_size=4, strides=2, padding="same"))  # 32x32 -> 16x16
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(1024, kernel_size=4, strides=2, padding="same"))  # 16x16 -> 8x8
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(2048, kernel_size=4, strides=2, padding="same"))  # 8x8 -> 4x4
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

# Build and compile the discriminator
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5), metrics=['accuracy'])

# Build the generator
generator = build_generator()

# Stack generator and discriminator
z = layers.Input(shape=(100,))
img = generator(z)
discriminator.trainable = False
valid = discriminator(img)
combined = tf.keras.Model(z, valid)
combined.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))

# Step 3: Train the GAN
epochs = 1000
batch_size = 64
save_interval = 99
X_train = dataset
half_batch = int(batch_size / 2)

for epoch in range(epochs):
    idx = np.random.randint(0, X_train.shape[0], half_batch)
    imgs = X_train[idx]
    noise = np.random.normal(0, 1, (half_batch, 100))
    gen_imgs = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(imgs, np.ones((half_batch, 1)))
    d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((half_batch, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, 100))
    valid_y = np.array([1] * batch_size)
    g_loss = combined.train_on_batch(noise, valid_y)

    print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

    if epoch % save_interval == 0:
        noise = np.random.normal(0, 1, (25, 100))
        gen_imgs = generator.predict(noise)
        gen_imgs = 0.5 * gen_imgs + 0.5
        fig, axs = plt.subplots(5, 5)
        cnt = 0
        for i in range(5):
            for j in range(5):
                axs[i, j].imshow(gen_imgs[cnt])
                axs[i, j].axis('off')
                cnt += 1
        plt.savefig(os.path.join(output_folder, f'epoch_{epoch}.png'))
        plt.close()

# Generate new images
noise = np.random.normal(0, 1, (10, 100))
gen_imgs = generator.predict(noise)
gen_imgs = 0.5 * gen_imgs + 0.5
for i in range(10):
    plt.imshow(gen_imgs[i])
    plt.axis('off')
    plt.savefig(os.path.join(output_folder, f'final_{i}.png'))
    plt.close()


Skipping file tic12.fit, as it is not a valid image.
Skipping file tic13.fit, as it is not a valid image.
Skipping file tic14.fit, as it is not a valid image.
Loaded 20 valid images, skipped 3 invalid images.
Dataset shape: (20, 256, 256, 3)
0 [D loss: 0.7136322259902954, acc.: 25.0] [G loss: 0.4853648543357849]
1 [D loss: 0.6064382195472717, acc.: 50.0] [G loss: 0.18785670399665833]
2 [D loss: 0.5991086035501212, acc.: 50.0] [G loss: 2.323019504547119]
3 [D loss: 0.04241851018741727, acc.: 100.0] [G loss: 6.621248722076416]
4 [D loss: 0.00022463122194269142, acc.: 100.0] [G loss: 6.613015174865723]
5 [D loss: 7.393892246910645e-06, acc.: 100.0] [G loss: 2.8089709281921387]
6 [D loss: 2.315321580681511e-06, acc.: 100.0] [G loss: 0.4079846441745758]
7 [D loss: 2.0241100173734653e-06, acc.: 100.0] [G loss: 0.07495035976171494]
8 [D loss: 1.933937255671503e-06, acc.: 100.0] [G loss: 0.017330750823020935]
9 [D loss: 1.9017961676582817e-06, acc.: 100.0] [G loss: 0.010278038680553436]
10 [D 