In [13]:
import numpy as np
from scipy.io import savemat

# Load the .npy file
data = np.load(r'H:\DDGAN\figure\uvw_18.npy')

u = data[0]
v = data[1]
w = data[2]

savemat(r'H:\DDGAN\figure\u', {'data': u})
savemat(r'H:\DDGAN\figure\v', {'data': v})
savemat(r'H:\DDGAN\figure\w', {'data': w})

In [2]:
import numpy as np
import pickle
import os
import glob
import torch
from torch.utils.data import DataLoader, Dataset
import re
import matplotlib.pyplot as plt
class CustomScaler:
    def __init__(self):
        self.u_min = float('inf')
        self.u_max = float('-inf')
    
    def partial_fit(self, batch):
        batch_min = np.min(batch)
        batch_max = np.max(batch)
        self.u_min = min(self.u_min, batch_min)
        self.u_max = max(self.u_max, batch_max)
    
    def fit(self, data_loader):
        for i, batch in enumerate(data_loader):
            self.partial_fit(batch.numpy())
            if i % 10 == 0:  # Print every 10 batches
                print(f"Batch {i}, Current min: {self.u_min:.6f}, Current max: {self.u_max:.6f}")
    
    def transform(self, u):
        u_scaled = (2 * u - (self.u_max + self.u_min)) / (self.u_max - self.u_min)
        return np.clip(u_scaled, -1, 1)
    
    def untransform(self, u_scaled):
        return 0.5 * (u_scaled * (self.u_max - self.u_min) + (self.u_max + self.u_min))

class GANDataset(Dataset):
    def __init__(self, folder_path, prefix):
        self.folder_path = folder_path
        self.prefix = prefix
        self.num_timesteps = 5000  # 0 to 4999

    def __len__(self):
        return self.num_timesteps

    def __getitem__(self, idx):
        file_path = os.path.join(self.folder_path, f'{self.prefix}_sample_{idx}.npy')
        data = np.load(file_path)
        return torch.from_numpy(data[0]).float()  # u = [0], v = [1] , w = [2]                       <--

def create_dataloader(dataset, batch_size=32):
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)

def save_scaler(scaler, path):
    with open(path, 'wb') as f:
        pickle.dump(scaler, f)

def load_scaler(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

def fit_scaler(gen_folder_path, disc_folder_path, scaler_path, batch_size = 5000):                     # <--
    print(f"Fitting scaler with batch size: {batch_size}")
    scaler = CustomScaler()

    # Create DataLoaders
    gen_dataset = GANDataset(gen_folder_path, 'g')
    disc_dataset = GANDataset(disc_folder_path, 'd')

    gen_loader = create_dataloader(gen_dataset, batch_size)
    disc_loader = create_dataloader(disc_dataset, batch_size)

    # Fit the single scaler on both datasets
    print("Fitting scaler on generator data...")
    scaler.fit(gen_loader)
    print("Fitting scaler on discriminator data...")
    scaler.fit(disc_loader)

    # Save the scaler
    save_scaler(scaler, scaler_path)
    print(f"Scaler saved to {scaler_path}")
    
    return scaler

In [7]:
import numpy as np
import os
import scipy.io
import pickle

# Path to the mask file
mask_path = r'H:\DDGAN\building_location.npy'
scaler_path = r'H:\DDGAN\scaler_u.pkl'
# Directory containing the .npy files
directory_path = r'H:\DDGAN\Orig_dataset\npy_uvw'

# Load the mask
mask = np.load(mask_path)

# Ensure the mask is boolean
mask = mask.astype(bool)

# Load the scaler
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)

# Collect all data
all_data = []

for filename in os.listdir(directory_path):
    if filename.endswith('.npy'):
        filepath = os.path.join(directory_path, filename)
        data = np.load(filepath)
        
        # Check if the data is 3D (multiple matrices)
        if data.ndim != 3:
            print(f"Unexpected data shape in {filename}. Skipping.")
            continue
        
        # Ensure the first matrix and mask have the same shape
        if data[2].shape != mask.shape:
            print(f"Shape mismatch for first matrix in {filename}. Skipping.")
            continue
        
        # Apply the mask only to the first matrix
        data[2][mask] = np.nan
        
        # Save the modified data back to the file
        np.save(filepath, data)
        
        # Collect the data for the distribution (only from the first matrix)
        all_data.extend(data[2].flatten())

# Convert to a numpy array
all_data = np.array(all_data)

# Remove NaN values before normalization
all_data_no_nan = all_data[~np.isnan(all_data)]

# Normalize the data
# normalized_data = scaler.transform(all_data_no_nan.reshape(-1, 1)).flatten()

# Save to .mat file for MATLAB
scipy.io.savemat('H:\DDGAN\data_distribution_w_before.mat', {'data_values': all_data_no_nan})

print("Processing complete.")

  scipy.io.savemat('H:\DDGAN\data_distribution_w_before.mat', {'data_values': all_data_no_nan})


Processing complete.
