In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Define the paths to your dataset
dataset_dir = "/Users/sadianasrintisha/Desktop/Dataset/NASA Worldview./understanding_cloud_organization"
train_images_dir = os.path.join(dataset_dir, "train_images")
test_images_dir = os.path.join(dataset_dir, "test_images")
train_csv_path = os.path.join(dataset_dir, "train.csv")

# Create empty lists to store images and labels
X_train = []
Y_train = []
X_test = []
Y_test = []  # Placeholder for test labels

# Read the CSV file to get image-label pairs
df = pd.read_csv(train_csv_path)

# Function to read, resize, and append images to X_train and labels to Y_train
def process_images_and_labels(image_dir, label_df, label_encoder, is_test=False):
    for index, row in label_df.iterrows():
        image_path = os.path.join(image_dir, row['Image_Label'].split('_')[0])
        image = Image.open(image_path).convert("RGB")
        image = image.resize((224, 224))
        image_array = np.array(image)
        
        if is_test:
            X_test.append(image_array)
            # For test data, you can use a placeholder or any appropriate label.
            Y_test.append(0)  # Placeholder label for test images
        else:
            X_train.append(image_array)
            label = row['EncodedPixels'] if not pd.isna(row['EncodedPixels']) else '0'
            Y_train.append(label_encoder.transform([label])[0])

# Initialize the label encoder
label_encoder = LabelEncoder()
label_encoder.fit(df['EncodedPixels'].fillna('0'))

# Process the training images and labels
process_images_and_labels(train_images_dir, df, label_encoder)

# Process the test images
test_image_files = os.listdir(test_images_dir)
process_images_and_labels(test_images_dir, pd.DataFrame({'Image_Label': test_image_files}), label_encoder, is_test=True)

# Convert lists to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)


In [3]:
print("Shape of X_train:", X_train.shape)
print("Shape of Y_train:", Y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_test:", Y_test.shape)

Shape of X_train: (22184, 224, 224, 3)
Shape of Y_train: (22184,)
Shape of X_test: (3698, 224, 224, 3)
Shape of Y_test: (3698,)


In [4]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

In [5]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.applications import ResNet50
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Dense
from keras.optimizers import Adam
from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam

from keras.utils import to_categorical  # Add this import

label_encoder = LabelEncoder()
Y_train_encoded = label_encoder.fit_transform(Y_train)

# Use to_categorical to one-hot encode the integer labels
num_classes = len(label_encoder.classes_)  # Number of unique classes
Y_train_one_hot = to_categorical(Y_train_encoded, num_classes=num_classes)

# Split your training data into training and validation sets
X_train, X_val, Y_train_one_hot, Y_val_one_hot = train_test_split(
    X_train, Y_train_one_hot, test_size=0.2, random_state=42
)


# Create the ResNet model without the top (fully connected) layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
 
optimizer = LegacyAdam(learning_rate=0.001)
    
# Add the top layers for classification
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(num_classes, activation='softmax'))  # Change the number of units and activation
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])



In [6]:
model.fit(X_train, Y_train_one_hot, batch_size=64, epochs=50, validation_data=(X_val, Y_val_one_hot))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x16ea259d0>

In [7]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# Assuming you have trained a model and obtained predicted probabilities on X_test
y_pred_prob = model.predict(X_test)

# Convert predicted probabilities to predicted labels
y_pred = np.argmax(y_pred_prob, axis=1)

# One-hot encode the true labels in Y_test if it's not already one-hot encoded
if len(Y_test.shape) == 1:  # Check if Y_test is 1D
    num_classes = len(np.unique(Y_test))
    Y_test_encoded = np.zeros((len(Y_test), num_classes))
    Y_test_encoded[np.arange(len(Y_test)), Y_test] = 1
else:
    Y_test_encoded = Y_test  # Y_test is already one-hot encoded

# Convert Y_test_encoded to predicted labels format
y_test_labels = np.argmax(Y_test_encoded, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_test_labels, y_pred)
print("Accuracy:", accuracy)

# Calculate F1 score (micro-average)
f1_micro = f1_score(y_test_labels, y_pred, average='micro')
print("F1 Score (Micro):", f1_micro)

Accuracy: 0.9989183342347214
F1 Score (Micro): 0.9989183342347214
