<a href="https://colab.research.google.com/github/tannisthamaiti/DiffusionModels_DDPM_DDIM/blob/main/GenerateLabels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Mount the drive
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Replace 'your_folder_name' with the actual name of your folder
folder_path = '/content/drive/MyDrive/DiffusionModels_DDPM_DDIM'

# Check if the folder exists
if os.path.exists(folder_path):
  print(f"Folder '{folder_path}' mounted successfully!")
else:
  print(f"Folder '{folder_path}' not found. Please check the path.")

# Now you can access files within the mounted folder
# For example, to list files in the folder:
os.listdir(folder_path)

Folder '/content/drive/MyDrive/DiffusionModels_DDPM_DDIM' mounted successfully!


['README.md',
 '.gitignore',
 'train',
 'train_labels.csv',
 'train_metadata.csv',
 'train_context.py',
 'helper_plot.py',
 'diffusion_01_score.ipynb',
 'output_image.jpg',
 'L4_FastSampling.ipynb',
 'Sampling.ipynb',
 'diffusion_utils.py',
 'unet_test.ipynb',
 'unet_attention.py',
 'spatial_helper.py',
 '__pycache__',
 'train_hddn.py',
 'resnet_helper.py',
 'train_attn.py',
 'train.py',
 'Sampling_backbone.ipynb',
 'Sampling_sprites.ipynb',
 '.git',
 'Sampling_attn.ipynb',
 'GenerateLabels.ipynb']

In [3]:
import numpy as np
import pandas as pd
from PIL import Image
train_file_path = os.path.join(folder_path, "train_labels.csv")
train_file_path_meta = os.path.join(folder_path, "train_metadata.csv")
labels = pd.read_csv(train_file_path)
metadata = pd.read_csv(train_file_path_meta)
array_2000 =[]

In [4]:
labels.head()

Unnamed: 0,Image ID,Wind Speed
0,nhe_000,34
1,nhe_001,34
2,nhe_002,34
3,nhe_003,33
4,nhe_004,32


In [5]:
metadata.head()

Unnamed: 0,Image ID,Storm ID,Relative Time,Ocean
0,nhe_000,nhe,0,2
1,nhe_001,nhe,1800,2
2,nhe_002,nhe,3600,2
3,nhe_003,nhe,5402,2
4,nhe_004,nhe,9001,2


In [6]:
#Check 1 image
filename = "ggv_278.jpg"
image_file_path=os.path.join(folder_path,"train/", filename)
img = Image.open(image_file_path)
image_array = np.array(img)
image_array.shape[0]

366

In [7]:
def channel3(img, size):
    empty_3d_array = np.empty((size, size, 3))
    empty_3d_array[:,:,0]=np.array(img)
    empty_3d_array[:,:,1]=np.array(img)
    empty_3d_array[:,:,2]=np.array(img)
    return empty_3d_array


In [8]:
def channel1(img):
    empty_1d_array = np.empty((16, 16, 1))
    empty_1d_array[:,:,0]=np.array(img)
    return empty_1d_array

In [9]:
def crop_center(image, new_width, new_height):
    # Get the current dimensions of the image
    width, height = image.size

    # Calculate the coordinates for the crop
    left = (width - new_width) // 2
    top = (height - new_height) // 2
    right = (width + new_width) // 2
    bottom = (height + new_height) // 2

    # Perform the crop
    cropped_image = image.crop((left, top, right, bottom))

    return cropped_image




In [10]:
from tqdm import tqdm

def process_images(folder_path):
    image_array = []
    for filename in tqdm(os.listdir(folder_path), desc="Processing images"):
        if filename.endswith(".jpg"):
            file_path = os.path.join(folder_path, filename)
            size = 128
            try:
                img = Image.open(file_path)
                cropped_image = crop_center(img, size, size)  # Crop to 128x128
                image_array.append(channel3(cropped_image, size))
                img.close()
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    return image_array

### Find the common images between label.csv file and `/train` folder. Save the wind_speed as a dict to be used to create the image_array and labels_array in the later codes.

In [11]:
folder_path_image= os.path.join(folder_path,"train/")
filenames = os.listdir(folder_path_image)
image_names = []  # Create an empty list to store image names
df =labels.copy()

image_filenames = [f for f in os.listdir(folder_path_image) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
# Extract the 'Image ID' column from the DataFrame
image_ids_df = df['Image ID'].astype(str).tolist()  # Convert to strings for comparison

# Convert image filenames to IDs by removing extensions
image_ids_files = [os.path.splitext(f)[0] for f in image_filenames]
# Find common and unique elements
common_ids = list(set(image_ids_df) & set(image_ids_files))
unique_ids_df = list(set(image_ids_df) - set(image_ids_files))
unique_ids_files = list(set(image_ids_files) - set(image_ids_df))

# Print the results
print(f"Number of common Image IDs: {len(common_ids)}")
print(f"Number of unique Image IDs in DataFrame: {len(unique_ids_df)}")
print(f"Number of unique Image IDs in files: {len(unique_ids_files)}")

wind_speeds = {}

# Iterate through common IDs and retrieve wind speeds
for image_id in common_ids:
    wind_speed = df.loc[df['Image ID'] == image_id, 'Wind Speed'].iloc[0]
    wind_speeds[image_id] = wind_speed


Number of common Image IDs: 24677
Number of unique Image IDs in DataFrame: 45580
Number of unique Image IDs in files: 0


In [12]:
def labels_process(value_loc):
    storm_array=[]
    # name_to_find = image_id
    # result_loc = labels.loc[labels['Image ID'] == name_to_find, 'Wind Speed'].values
    # value_loc=result_loc[0]
    if ((value_loc>=15) & (value_loc<=45)):
        storm_array = switch_case(1)
    elif ((value_loc>45) & (value_loc<=80)):
        storm_array = switch_case(2)
    elif (value_loc>80 & value_loc<=110):
        storm_array = switch_case(3)
    elif (value_loc>110 & value_loc<=150):
        storm_array = switch_case(4)
    elif (value_loc>150 & value_loc<=190):
        storm_array = switch_case(5)
    return storm_array

In [13]:
def switch_case(argument):
    return {
        1: [1,0,0,0,0],
        2: [0,1,0,0,0],
        3: [0,0,1,0,0],
        4: [0,0,0,1,0],
        5: [0,0,0,0,1]
    }.get(argument, "Invalid option")

### use 10% of images to check the code

In [14]:
from tqdm import tqdm
import random

def process_images(folder_path,wind_speeds):
    image_array = []
    filenames = os.listdir(folder_path)
    num_files_to_process = int(len(filenames) * 0.1)  # Calculate 10%
    wind_speed_array=[]
    # Wrap selection process with tqdm
    with tqdm(total=num_files_to_process, desc="Selecting files") as pbar_selection:
        selected_filenames = random.sample(filenames, num_files_to_process)
        pbar_selection.update(num_files_to_process) # Update selection progress bar

    # Wrap processing loop with tqdm
    with tqdm(total=num_files_to_process, desc="Processing images") as pbar_processing:
        for filename in selected_filenames:
            if filename.endswith(".jpg"):
                file_path = os.path.join(folder_path, filename)
                size = 128
                try:
                    img = Image.open(file_path)
                    cropped_image = crop_center(img, size, size)
                    image_array.append(channel3(cropped_image, size))
                    wind_speed_array.append(labels_process(wind_speeds[image_id]))  # Add wind speed to list
                    img.close()
                except Exception as e:
                    print(f"Error processing {filename}: {e}")
                finally:
                    pbar_processing.update(1) # Update processing progress bar
    return image_array,wind_speed_array

In [15]:
image_array,labels_array= process_images(folder_path_image,wind_speeds)

Selecting files: 100%|██████████| 2467/2467 [00:00<00:00, 1183771.65it/s]
Processing images: 100%|██████████| 2467/2467 [32:12<00:00,  1.28it/s]


In [16]:
file_path_label = "wind_label_3D128X128.npy"
file_path = "wind_3D128X128.npy"
np.save(file_path, image_array)
np.save(file_path_label, labels_array)

In [17]:
import os
import pandas as pd

# Assuming 'metadata' is your DataFrame and 'folder_path_image' is the path to your images
image_folder = folder_path_image  # Replace with your image folder path
df = metadata  # Assuming 'metadata' is your DataFrame

# Get a list of image filenames from the folder
image_filenames = [f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

# Extract the 'Image ID' column from the DataFrame
image_ids_df = df['Image ID'].astype(str).tolist()  # Convert to strings for comparison

# Convert image filenames to IDs by removing extensions
image_ids_files = [os.path.splitext(f)[0] for f in image_filenames]

# Find common and unique elements
common_ids = list(set(image_ids_df) & set(image_ids_files))
unique_ids_df = list(set(image_ids_df) - set(image_ids_files))
unique_ids_files = list(set(image_ids_files) - set(image_ids_df))

# Print the results
print(f"Number of common Image IDs: {len(common_ids)}")
print(f"Number of unique Image IDs in DataFrame: {len(unique_ids_df)}")
print(f"Number of unique Image IDs in files: {len(unique_ids_files)}")

# Optional: Print the unique IDs
if unique_ids_df:
    print("\nUnique Image IDs in DataFrame:")
    print(unique_ids_df)
if unique_ids_files:
    print("\nUnique Image IDs in files:")
    print(unique_ids_files)

Number of common Image IDs: 24677
Number of unique Image IDs in DataFrame: 45580
Number of unique Image IDs in files: 0

Unique Image IDs in DataFrame:
['jjw_004', 'xlb_058', 'kxm_050', 'dsr_034', 'pbk_010', 'ipa_174', 'gmt_042', 'wic_031', 'cqv_013', 'rkw_167', 'dwc_324', 'dwc_035', 'dvh_125', 'tlc_130', 'amn_105', 'vxf_000', 'cxh_035', 'psz_476', 'oyc_015', 'ykj_130', 'wvi_200', 'ugc_146', 'eoi_477', 'gkf_084', 'imi_006', 'mfm_159', 'ing_102', 'xjo_245', 'vaj_036', 'ogw_068', 'dli_090', 'eih_143', 'fay_039', 'zvv_032', 'xjo_455', 'ilk_051', 'vzn_071', 'zgr_178', 'ise_033', 'kza_132', 'lqy_162', 'whm_094', 'vaj_021', 'vye_056', 'mtw_141', 'ydl_331', 'yjk_053', 'dvh_129', 'dce_229', 'zgi_327', 'djb_177', 'ffk_146', 'ilk_060', 'ijc_273', 'ztb_126', 'tnj_021', 'kxo_054', 'pyi_048', 'uhd_043', 'ezh_058', 'ggv_108', 'yit_077', 'dvh_021', 'ftb_006', 'djr_081', 'fvj_000', 'nkd_194', 'wvi_130', 'rkw_001', 'alq_060', 'ykj_122', 'rck_005', 'efl_278', 'ezh_142', 'xrh_014', 'psz_226', 'ijc_183', 

In [19]:
print(len(labels_array))

2467
