In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [2]:
# Create directories for saving results
os.makedirs('generated_samples', exist_ok=True)
os.makedirs('model_checkpoints', exist_ok=True)
os.makedirs('results', exist_ok=True)

In [None]:
# num_of_classes = 10  # 10 types mentioned in the paper
num_of_classes = 8  # 8 weather types from spatial synoptic classification

In [3]:
class WeatherClassificationWGANGP:
    def __init__(self, img_shape=(64, 64, 3), num_classes=10, latent_dim=100, gradient_penalty_weight=10):
        self.img_shape = img_shape
        self.num_classes = num_classes  # 10 types of weather status as mentioned in the paper
        self.latent_dim = latent_dim
        self.gradient_penalty_weight = gradient_penalty_weight
        
        # Following WGAN-GP paper, we use Adam optimizer
        self.generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)
        self.discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9)
        
        # Build models
        self.generator = self.build_generator()
        self.discriminator = self.build_discriminator()
        
    def build_generator(self):
        """Build the generator model as described in the paper"""
        model = models.Sequential()
        
        # Foundation for 4x4 feature maps
        model.add(layers.Dense(4 * 4 * 256, use_bias=False, input_shape=(self.latent_dim,)))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU(alpha=0.2))
        model.add(layers.Reshape((4, 4, 256)))
        
        # Upsampling layers
        model.add(layers.Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', use_bias=False))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU(alpha=0.2))
        
        model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU(alpha=0.2))
        
        model.add(layers.Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same', use_bias=False))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU(alpha=0.2))
        
        model.add(layers.Conv2DTranspose(self.img_shape[2], (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
        
        return model
    
    def build_discriminator(self):
        """Build the discriminator model as described in the paper"""
        model = models.Sequential()
        
        # No batch normalization in critic as per WGAN-GP paper
        model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=self.img_shape))
        model.add(layers.LeakyReLU(alpha=0.2))
        model.add(layers.Dropout(0.3))
        
        model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
        model.add(layers.LeakyReLU(alpha=0.2))
        model.add(layers.Dropout(0.3))
        
        model.add(layers.Conv2D(256, (5, 5), strides=(2, 2), padding='same'))
        model.add(layers.LeakyReLU(alpha=0.2))
        model.add(layers.Dropout(0.3))
        
        model.add(layers.Flatten())
        
        # Weather classification output
        model.add(layers.Dense(self.num_classes))
        
        return model
    
    def gradient_penalty(self, real_images, fake_images):
        """Calculate the gradient penalty for WGAN-GP"""
        batch_size = real_images.shape[0]
        
        # Create random interpolation points between real and fake images
        alpha = tf.random.uniform([batch_size, 1, 1, 1], 0.0, 1.0)
        interpolated = alpha * real_images + (1 - alpha) * fake_images
        
        with tf.GradientTape() as tape:
            tape.watch(interpolated)
            # Get critic output for interpolated images
            interpolated_output = self.discriminator(interpolated, training=True)
        
        # Calculate gradients with respect to inputs
        gradients = tape.gradient(interpolated_output, interpolated)
        # Calculate norm of gradients
        gradients_norm = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
        # Calculate gradient penalty
        gradient_penalty = tf.reduce_mean((gradients_norm - 1.0) ** 2)
        
        return gradient_penalty
    
    @tf.function
    def train_discriminator(self, real_images, labels):
        """Train the discriminator (critic) following WGAN-GP approach"""
        batch_size = real_images.shape[0]
        noise = tf.random.normal([batch_size, self.latent_dim])
        
        with tf.GradientTape() as tape:
            # Generate fake images
            fake_images = self.generator(noise, training=True)
            
            # Get critic output for real and fake images
            real_output = self.discriminator(real_images, training=True)
            fake_output = self.discriminator(fake_images, training=True)
            
            # Calculate Wasserstein loss
            real_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels, logits=real_output))
            fake_loss = tf.reduce_mean(tf.reduce_mean(fake_output, axis=1))
            
            # Calculate gradient penalty
            gp = self.gradient_penalty(real_images, fake_images)
            
            # Total discriminator loss
            disc_loss = real_loss + fake_loss + self.gradient_penalty_weight * gp
        
        # Get gradients and update weights
        gradients = tape.gradient(disc_loss, self.discriminator.trainable_variables)
        self.discriminator_optimizer.apply_gradients(
            zip(gradients, self.discriminator.trainable_variables))
        
        return disc_loss, real_loss, fake_loss, gp
    
    @tf.function
    def train_generator(self):
        """Train the generator following WGAN-GP approach"""
        batch_size = 64  # Fixed batch size for generator
        noise = tf.random.normal([batch_size, self.latent_dim])
        
        with tf.GradientTape() as tape:
            # Generate fake images
            fake_images = self.generator(noise, training=True)
            
            # Get critic output for fake images
            fake_output = self.discriminator(fake_images, training=True)
            
            # Calculate generator loss
            gen_loss = -tf.reduce_mean(tf.reduce_mean(fake_output, axis=1))
        
        # Get gradients and update weights
        gradients = tape.gradient(gen_loss, self.generator.trainable_variables)
        self.generator_optimizer.apply_gradients(
            zip(gradients, self.generator.trainable_variables))
        
        return gen_loss
    
    def train(self, dataset, epochs, batch_size=64, n_critic=5):
        """Train the WGAN-GP model"""
        steps_per_epoch = len(dataset) // batch_size
        
        # Lists to store loss values for plotting
        disc_losses = []
        gen_losses = []
        
        for epoch in range(epochs):
            epoch_disc_losses = []
            epoch_gen_losses = []
            
            for step in range(steps_per_epoch):
                # Get real images batch
                indices = np.random.randint(0, len(dataset), batch_size)
                real_images = dataset[indices]
                labels = np.random.randint(0, self.num_classes, batch_size)  # Random labels for demonstration
                
                # Train discriminator for n_critic iterations
                for _ in range(n_critic):
                    d_loss, real_loss, fake_loss, gp = self.train_discriminator(real_images, labels)
                    epoch_disc_losses.append(d_loss.numpy())
                
                # Train generator
                g_loss = self.train_generator()
                epoch_gen_losses.append(g_loss.numpy())
                
            # Append average losses for this epoch
            disc_losses.append(np.mean(epoch_disc_losses))
            gen_losses.append(np.mean(epoch_gen_losses))
            
            # Print progress and generate samples
            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}/{epochs}")
                print(f"D loss: {disc_losses[-1]:.4f}, G loss: {gen_losses[-1]:.4f}")
                self.generate_and_save_images(epoch + 1)
                
                # Plot loss curves
                self.plot_losses(disc_losses, gen_losses, epoch + 1)
            
            # Save model checkpoints
            if (epoch + 1) % 50 == 0:
                self.generator.save(f'model_checkpoints/generator_epoch_{epoch+1}.h5')
                self.discriminator.save(f'model_checkpoints/discriminator_epoch_{epoch+1}.h5')
    
    def generate_and_save_images(self, epoch):
        """Generate and save sample images"""
        noise = tf.random.normal([16, self.latent_dim])
        generated_images = self.generator(noise, training=False)
        generated_images = (generated_images + 1) / 2.0  # Rescale to [0, 1]
        
        fig = plt.figure(figsize=(4, 4))
        for i in range(generated_images.shape[0]):
            plt.subplot(4, 4, i+1)
            plt.imshow(generated_images[i, :, :, :])
            plt.axis('off')
        
        plt.savefig(f'generated_samples/epoch_{epoch}.png')
        plt.close()
    
    def plot_losses(self, disc_losses, gen_losses, epoch):
        """Plot and save loss curves"""
        plt.figure(figsize=(10, 5))
        plt.plot(disc_losses, label='Discriminator Loss')
        plt.plot(gen_losses, label='Generator Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title(f'WGAN-GP Training Losses (Epoch {epoch})')
        plt.tight_layout()
        plt.savefig(f'results/losses_epoch_{epoch}.png')
        plt.close()
    
    def generate_synthetic_data(self, n_samples, labels=None):
        """Generate synthetic weather images"""
        noise = tf.random.normal([n_samples, self.latent_dim])
        generated_images = self.generator(noise, training=False)
        
        # If labels are not provided, generate random labels
        if labels is None:
            labels = np.random.randint(0, self.num_classes, n_samples)
        
        return generated_images, labels
    
    def classify_weather(self, images):
        """Classify weather images using the trained discriminator"""
        logits = self.discriminator(images, training=False)
        return tf.nn.softmax(logits)

# 1. Data Preparation

In [4]:
import requests
import ftplib
import os
from datetime import datetime, timedelta

def download_surfrad_data(station='dra', start_date='2014-01-01', end_date='2015-12-31'):
    """
    Download SURFRAD data for a specific station and date range
    station: 'dra' for Desert Rock
    """
    # Base URL for SURFRAD data
    base_url = "https://gml.noaa.gov/aftp/data/radiation/surfrad/"
    
    # Parse dates
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    
    # Create directory for data
    os.makedirs(f'surfrad_data/{station}', exist_ok=True)
    
    # Loop through all dates
    current_date = start
    while current_date <= end:
        year = current_date.year
        month = current_date.month
        day = current_date.day

        for day in range(1, 365):
            # Construct URL for specific date
            year_formatted = str(year)[2:]
            file_name = f"{station}{year_formatted}{day:03d}.dat"
            url = f"{base_url}{station}/{year}/{file_name}"
            
            # Download file
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    with open(f'surfrad_data/{station}/{file_name}', 'wb') as f:
                        f.write(response.content)
                    print(f"Downloaded {file_name}")
                else:
                    print(f"Failed to download {file_name}: {response.status_code}")
            except Exception as e:
                print(f"Error downloading {file_name}: {e}")
        
        # Move to next day
        current_date += timedelta(days=365)

# Example usage
download_surfrad_data(station='dra', start_date='2014-01-01', end_date='2015-12-31')

Downloaded dra14001.dat
Downloaded dra14002.dat
Downloaded dra14003.dat
Downloaded dra14004.dat
Downloaded dra14005.dat
Downloaded dra14006.dat
Downloaded dra14007.dat
Downloaded dra14008.dat
Downloaded dra14009.dat
Downloaded dra14010.dat
Downloaded dra14011.dat
Downloaded dra14012.dat
Downloaded dra14013.dat
Downloaded dra14014.dat
Downloaded dra14015.dat
Downloaded dra14016.dat
Downloaded dra14017.dat
Downloaded dra14018.dat
Downloaded dra14019.dat
Downloaded dra14020.dat
Downloaded dra14021.dat
Downloaded dra14022.dat
Downloaded dra14023.dat
Downloaded dra14024.dat
Downloaded dra14025.dat
Downloaded dra14026.dat
Downloaded dra14027.dat
Downloaded dra14028.dat
Downloaded dra14029.dat
Downloaded dra14030.dat
Downloaded dra14031.dat
Downloaded dra14032.dat
Downloaded dra14033.dat
Downloaded dra14034.dat
Downloaded dra14035.dat
Downloaded dra14036.dat
Downloaded dra14037.dat
Downloaded dra14038.dat
Downloaded dra14039.dat
Downloaded dra14040.dat
Downloaded dra14041.dat
Downloaded dra14

In [13]:
def create_date_labels(station='dra'):

    base_url = "https://sheridan.geog.kent.edu/ssc/files/"
    file_name = station.upper() + ".cal3"

    try:
        url = f"{base_url}{file_name}"
        response = requests.get(url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded {file_name}")
        else:
            print(f"Failed to download {file_name}: {response.status_code}")
    except Exception as e:
        print(f"Error downloading {file_name}: {e}")

# Example usage
create_date_labels(station='dra')

Downloaded DRA.cal3


In [4]:
import pandas as pd

def read_labels(station='dra', start_date='2014-01-01', end_date='2015-12-31'):

    file_name = station.upper() + ".cal3"

    a = pd.read_csv(file_name, sep='\s+', header=None)
    selection = a.loc[(a[1] >= int(start_date.replace("-", ""))) & (a[1] <= int(end_date.replace("-", "")))]
    return selection[2].divide(10).round(0).astype(int).to_numpy()  # remove '+' labels

labels = read_labels()

In [5]:
import numpy as np
import pandas as pd
import os
import glob
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler

def read_surfrad_data(data_dir):
    """
    Read all SURFRAD .dat files in the specified directory
    Returns a DataFrame with parsed data
    """
    # Get all .dat files in the directory
    file_pattern = os.path.join(data_dir, '*.dat')
    data_files = glob.glob(file_pattern)
    
    all_data = []
    
    for file_path in data_files:
        try:
            # SURFRAD files have a specific format with headers
            # Skip the header lines (first 2 rows)
            df = pd.read_csv(file_path, sep='\s+', skiprows=2, header=None)
            
            # Extract filename for date information
            """ filename = os.path.basename(file_path)
            date_str = filename[3:11]  # Extract YYYYMMDD from draYYYYMMDD.dat
            file_date = datetime.strptime(date_str, '%Y%m%d')
            print(df[1]) """
        
            
            # Create timestamp from date and time columns
            """ 
            # Filter out bad values (often marked as -9999.9)
            parsed_df = parsed_df[parsed_df['irradiance'] > -999]
            
            all_data.append(parsed_df) """
            cols_to_drop = df.columns[9::2]  # Every second column from index 9 onward
            df_cleaned = df.drop(columns=cols_to_drop)
            all_data.append(df_cleaned)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    # Combine all data files
    if not all_data:
        raise ValueError("No valid data files found")
    
    combined_data = pd.concat(all_data)
    
    # Sort by timestamp
    #combined_data = combined_data.sort_values('timestamp')
    
    # Set timestamp as index
    #combined_data.set_index('timestamp', inplace=True)
    
    return combined_data

# Load your NOAA SURFRAD data
data_dir = 'final_project\\surfrad_data\\dra'  
data_dir = os.path.abspath('surfrad_data/dra')
df = read_surfrad_data(data_dir)
pd.set_option('display.max_columns', None)  # Show all columns

# Convert to 15-minute resolution by averaging
def convert_to_15min_resolution(df):
    # Ensure the DataFrame index is a range index
    df = df.reset_index(drop=True)
    
    # Group the DataFrame by every 15 rows and calculate the mean for each group
    df_15min = df.groupby(df.index // 15).mean()
    
    return df_15min

# Now we can continue with the rest of your pipeline
df_15min = convert_to_15min_resolution(df)

# Extract only points 18-78 for each day (corresponding to daylight hours)
def extract_daylight_hours(df):
    # Define chunk size and slice range
    chunk_size = 96
    start, end = 18, 79  # Keep indices 18 to 78 (end index is exclusive)

    # Process data in chunks
    filtered_data = pd.concat([df.iloc[i + start : i + end] for i in range(0, len(df), chunk_size)])
    return filtered_data


df_daylight = extract_daylight_hours(df_15min)

# Split into training and testing sets
def split_train_test(df, lb):
    data_points_per_day = 61
    total_days = len(df) // data_points_per_day
    labels_for_each_datapoint = np.repeat(lb, data_points_per_day)

    train_days = int(total_days * 0.6)  # 70% for training
    test_days = total_days - train_days

    train_index = train_days * data_points_per_day
    test_index = train_index + (test_days * data_points_per_day)

    df_train = df.iloc[:train_index].reset_index(drop=True)
    df_test = df.iloc[train_index:test_index].reset_index(drop=True)

    labels_train = labels_for_each_datapoint[:train_index]
    labels_test = labels_for_each_datapoint[train_index:test_index]

    return df_train, df_test, labels_train, labels_test

print(df_daylight)
train_data, test_data, train_labels, test_labels = split_train_test(df_daylight, labels)
print(train_data)

# Extract the irradiance values as numpy arrays for model training
print(train_data[16].values)
X_train = train_data[16].values.reshape(-1, 61)  # 61 points per day (18-78)
X_test = test_data[16].values.reshape(-1, 61) # look up what the right index is for the dataset 

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
print(X_train)
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

print(f"Training data shape: {X_train_normalized.shape}")
print(f"Testing data shape: {X_test_normalized.shape}")

print(f"Training labels shape: {train_labels.shape}")
print(f"Testing labels shape: {test_labels.shape}")

           0      1     2     3          4     5          6           7   \
18     2014.0    1.0   1.0   1.0   4.000000  37.0   4.616667  136.880000   
19     2014.0    1.0   1.0   1.0   4.000000  52.0   4.866667  139.868000   
20     2014.0    1.0   1.0   1.0   5.000000   7.0   5.116667  142.842667   
21     2014.0    1.0   1.0   1.0   5.000000  22.0   5.366667  145.790000   
22     2014.0    1.0   1.0   1.0   5.000000  37.0   5.616667  148.696000   
...       ...    ...   ...   ...        ...   ...        ...         ...   
69844  2015.0  364.0  12.0  30.0  22.733333  19.0  23.050000   75.454667   
69845  2015.0  364.0  12.0  30.0  23.000000  18.0  23.300000   77.650000   
69846  2015.0  364.0  12.0  30.0  23.000000  33.0  23.550000   79.929333   
69847  2015.0  364.0  12.0  30.0  23.000000  48.0  23.800000   82.284667   
69848  2015.0  364.0  12.0  30.0  23.000000  57.5  23.958250   83.810000   

               8          10          12         14          16          18  \
18      

# 2. Modify the Model Architecture

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Update the model architecture based on paper's specifications
def update_model_architecture(model):
    # Generator and discriminator with 2 hidden layers of 124 neurons each
    generator = models.Sequential([
        layers.Dense(124, activation='relu', input_shape=(60,)),  # 60-dim uniform noise
        layers.Dense(124, activation='relu'),
        layers.Dense(61)  # Output layer (61 points for positions 18-78)
    ])
    
    discriminator = models.Sequential([
        layers.Dense(124, activation='relu', input_shape=(61,)),
        layers.Dense(124, activation='relu'),
        layers.Dense(num_of_classes)  # 10 classes output
    ])
    
    # Update model properties
    model.generator = generator
    model.discriminator = discriminator
    model.latent_dim = 60
    model.generator_optimizer = tf.keras.optimizers.Adam()
    model.discriminator_optimizer = tf.keras.optimizers.Adam()
    
    return model

# Update your existing model
wgan_model = WeatherClassificationWGANGP(num_classes=num_of_classes)
wgan_model = update_model_architecture(wgan_model)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# 3. Modify the Training Procedure

In [30]:
def modified_train(model, dataset, epochs, batch_size=64):
    """Modified training procedure to match paper specifications"""
    steps_per_epoch = len(dataset) // batch_size
    
    disc_losses = []
    gen_losses = []
    
    for epoch in range(epochs):
        epoch_disc_losses = []
        epoch_gen_losses = []
        
        for step in range(steps_per_epoch):
            # Get real images batch
            indices = np.random.randint(0, len(dataset), batch_size)
            real_data = dataset.iloc[indices]
            print(real_data.shape)
            print(real_data.shape)
            # Get weather class labels based on your classification scheme
            # labels = np.zeros(batch_size)  # Replace with actual labels
            labels = train_labels[indices]
            print(labels.shape)

            # Train discriminator for 15 steps (as per paper)
            for _ in range(15):
                d_loss, real_loss, fake_loss, gp = model.train_discriminator(real_data, labels)
                epoch_disc_losses.append(d_loss.numpy())
            
            # Train generator for 1 step
            g_loss = model.train_generator()
            epoch_gen_losses.append(g_loss.numpy())
        
        # Store and print losses
        disc_losses.append(np.mean(epoch_disc_losses))
        gen_losses.append(np.mean(epoch_gen_losses))
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"D loss: {disc_losses[-1]:.4f}, G loss: {gen_losses[-1]:.4f}")
    
    return disc_losses, gen_losses

# 4. Implement the Classification Models

In [12]:
def build_cnn1d_model():
    model = models.Sequential([
        layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(61, 1)),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=5, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=8, activation='relu'),
        layers.Flatten(),
        layers.Dense(100, activation='relu'),
        layers.Dense(num_of_classes, activation='softmax')  # 10 weather classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def build_cnn2d_model():
    model = models.Sequential([
        layers.Conv2D(64, kernel_size=(1, 1), activation='relu', input_shape=(61, 1, 1)),
        layers.Conv2D(64, kernel_size=(2, 1), activation='relu'),
        layers.Conv2D(64, kernel_size=(3, 2), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(100, activation='relu'),
        layers.Dense(num_of_classes, activation='softmax')  # 10 weather classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def build_mlp_model():
    model = models.Sequential([
        layers.Flatten(input_shape=(61,)),
        layers.Dense(100, activation='relu'),
        layers.Dense(100, activation='relu'),
        layers.Dense(num_of_classes, activation='softmax')  # 10 weather classes
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# 5. Train SVM and KNN Models

In [13]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

def train_svm_model(X_train, y_train):
    # Grid search for optimal parameters
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'gamma': [0.001, 0.01, 0.1, 1],
        'kernel': ['rbf', 'linear']
    }
    
    svm = SVC()
    grid_search = GridSearchCV(svm, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    
    print(f"Best SVM parameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

def train_knn_model(X_train, y_train):
    # Grid search for optimal parameters
    param_grid = {
        'n_neighbors': [3, 5, 7, 9, 11],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan']
    }
    
    knn = KNeighborsClassifier()
    grid_search = GridSearchCV(knn, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    
    print(f"Best KNN parameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

# 6. Complete Training Pipeline

In [14]:
def train_full_pipeline(train_data, test_data, epochs=100):
    # Step 1: Train WGAN-GP model
    wgan_model = WeatherClassificationWGANGP(num_classes=num_of_classes)
    wgan_model = update_model_architecture(wgan_model)
    disc_losses, gen_losses = modified_train(wgan_model, train_data, epochs)
    
    # Step 2: Generate synthetic data
    n_synthetic = 10000  # Define how many synthetic samples to generate
    synthetic_data, synthetic_labels = wgan_model.generate_synthetic_data(n_synthetic)
    
    # Denormalize synthetic data
    synthetic_data_denorm = scaler.inverse_transform(synthetic_data)
    
    # Step 3: Combine real and synthetic data
    X_combined = np.vstack([train_data, synthetic_data])
    y_combined = np.concatenate([train_labels, synthetic_labels])  # Assuming you have train_labels
    
    # Step 4: Train classification models
    # CNN1D
    cnn1d = build_cnn1d_model()
    X_cnn1d = X_combined.reshape(-1, 61, 1)  # Reshape for 1D convolution
    cnn1d.fit(X_cnn1d, y_combined, epochs=50, batch_size=32, validation_split=0.2)
    
    # CNN2D
    cnn2d = build_cnn2d_model()
    X_cnn2d = X_combined.reshape(-1, 61, 1, 1)  # Reshape for 2D convolution
    cnn2d.fit(X_cnn2d, y_combined, epochs=50, batch_size=32, validation_split=0.2)
    
    # MLP
    mlp = build_mlp_model()
    mlp.fit(X_combined, y_combined, epochs=50, batch_size=32, validation_split=0.2)
    
    # SVM
    svm = train_svm_model(X_combined, y_combined)
    
    # KNN
    knn = train_knn_model(X_combined, y_combined)
    
    # Step 5: Evaluate models on test data
    X_test_cnn1d = test_data.reshape(-1, 61, 1)
    X_test_cnn2d = test_data.reshape(-1, 61, 1, 1)
    
    print("CNN1D Accuracy:", cnn1d.evaluate(X_test_cnn1d, test_labels)[1])
    print("CNN2D Accuracy:", cnn2d.evaluate(X_test_cnn2d, test_labels)[1])
    print("MLP Accuracy:", mlp.evaluate(test_data, test_labels)[1])
    print("SVM Accuracy:", svm.score(test_data, test_labels))
    print("KNN Accuracy:", knn.score(test_data, test_labels))
    
    return {
        'wgan_model': wgan_model,
        'cnn1d': cnn1d,
        'cnn2d': cnn2d,
        'mlp': mlp,
        'svm': svm,
        'knn': knn
    }

In [15]:
class CNN_Classifier:
    """Convolutional Neural Network for Weather Classification"""
    def __init__(self, img_shape=(64, 64, 3), num_classes=10):
        self.img_shape = img_shape
        self.num_classes = num_classes
        
        # Build model
        self.model = self.build_model()
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(1e-4),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
        )
    
    def build_model(self):
        """Build CNN model for weather classification"""
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=self.img_shape),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(self.num_classes)
        ])
        return model
    
    def train(self, train_images, train_labels, validation_data=None, epochs=10, batch_size=32):
        """Train the CNN classifier"""
        # Set up callbacks
        checkpoint = ModelCheckpoint(
            'model_checkpoints/cnn_classifier_best.h5',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        )
        early_stopping = EarlyStopping(
            monitor='val_accuracy',
            patience=10,
            restore_best_weights=True,
            verbose=1
        )
        
        history = self.model.fit(
            train_images, train_labels,
            validation_data=validation_data,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[checkpoint, early_stopping]
        )
        
        # Plot training history
        self.plot_training_history(history)
        
        return history
    
    def plot_training_history(self, history):
        """Plot and save training history"""
        # Plot accuracy
        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.title('CNN Classifier - Accuracy')
        
        # Plot loss
        plt.subplot(1, 2, 2)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('CNN Classifier - Loss')
        
        plt.tight_layout()
        plt.savefig('results/cnn_classifier_training_history.png')
        plt.close()
    
    def evaluate(self, test_images, test_labels):
        """Evaluate the CNN classifier"""
        return self.model.evaluate(test_images, test_labels)

In [35]:
wgan_model.discriminator.summary()

In [37]:
wgan_model.generator.summary()