1. Import Required Libraries and Set Paths

In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import glob
import os
from datetime import datetime

# Define paths
input_path = '/Users/akv020/Tensorflow/fennomag-net/data/secs/test_mode_2024/figures'
output_path = '/Users/akv020/Tensorflow/fennomag-net/source/model2024/data'

# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

2. Get List of PNG Files and Extract Timestamps

In [3]:
# Get sorted list of all PNG files
png_files = sorted(glob.glob(os.path.join(input_path, '*.png')))

# Function to extract timestamp from filename
def filename_to_datetime(filename):
    basename = os.path.basename(filename)
    timestamp_str = basename.replace('.png', '')
    return datetime.strptime(timestamp_str, '%Y%m%d_%H%M%S')

# Extract timestamps and create DatetimeIndex
timestamps = [filename_to_datetime(f) for f in png_files]
timestamp_index = pd.DatetimeIndex(timestamps)

print(f"Total number of images found: {len(png_files)}")
print(f"Date range: {timestamp_index.min()} to {timestamp_index.max()}")

Total number of images found: 527040
Date range: 2024-01-01 00:00:00 to 2024-12-31 23:59:00


3. Create Data Array and Process Images

In [4]:
# Initialize the array for SECS data with explicit float32
n_files = len(png_files)
secs_data = np.zeros((n_files, 21, 21, 3), dtype=np.float32)

# Process all images with progress tracking
from tqdm.notebook import tqdm

for i, png_file in tqdm(enumerate(png_files), total=n_files, desc="Processing images"):
    # Read image
    img = Image.open(png_file)
    # Convert to numpy array and ensure float32
    img_array = np.asarray(img, dtype=np.float32) / np.float32(255.0)
    secs_data[i] = img_array

print("Array shape:", secs_data.shape)
print("Data type:", secs_data.dtype)
print("Memory usage:", secs_data.nbytes / 1e9, "GB")

Processing images:   0%|          | 0/527040 [00:00<?, ?it/s]

Array shape: (527040, 21, 21, 3)
Data type: float32
Memory usage: 2.78909568 GB


4. Save Arrays to Disk (ensuring float32)


In [5]:
# Save the data array and timestamps
np.save(os.path.join(output_path, 'secs_data.npy'), secs_data)
np.save(os.path.join(output_path, 'secs_timestamps.npy'), timestamp_index.values)

print("Files saved successfully!")

Files saved successfully!


5. Verification (Optional)

In [6]:
# Load and verify the saved data
loaded_data = np.load(os.path.join(output_path, 'secs_data.npy'))
loaded_timestamps = np.load(os.path.join(output_path, 'secs_timestamps.npy'))

print("Verification:")
print("Data shape:", loaded_data.shape)
print("Data type:", loaded_data.dtype)
print("Data range:", loaded_data.min(), "to", loaded_data.max())
print("Timestamps shape:", loaded_timestamps.shape)
print("First timestamp:", pd.Timestamp(loaded_timestamps[0]))
print("Last timestamp:", pd.Timestamp(loaded_timestamps[-1]))

Verification:
Data shape: (527040, 21, 21, 3)
Data type: float32
Data range: 0.0 to 0.99607843
Timestamps shape: (527040,)
First timestamp: 2024-01-01 00:00:00
Last timestamp: 2024-12-31 23:59:00


In [7]:
pwd

'/Users/akv020/Tensorflow/fennomag-net/source/preprocess'

In [8]:
output_path

'/Users/akv020/Tensorflow/fennomag-net/source/model2024/data'