In [1]:
# imports
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import pandas as pd
import matplotlib.pyplot as plt


# load dataset

In [2]:
# load dataset
df_train = pd.read_csv("../meta/Train.csv")
df_train.head()

df_test = pd.read_csv("../meta/Test.csv")
df_test.head()








Unnamed: 0,ID,filename,growth_stage,damage,extent,season
0,ID_1S8OOWQYCB,L427F01330C01S03961Rp02052.jpg,S,WD,0,SR2020
1,ID_0MD959MIZ0,L1083F00930C39S12674Ip.jpg,V,G,0,SR2021
2,ID_JRJCI4Q11V,24_initial_1_1463_1463.JPG,V,G,0,LR2020
3,ID_DBO3ZGI1GM,L341F00167C01S00324Rp14178.jpg,M,DR,60,SR2020
4,ID_ORZLWTEUUS,L1084F02394C39S13931Ip.jpg,V,G,0,SR2021


# Preprocessing

In [3]:
# percentage of each season
df_grouped_by_season =  df_train.groupby("season")

# Calculate the size of each group
group_sizes = df_grouped_by_season.size()

# Calculate the percentage of each group based on the total count
total_count = group_sizes.sum()
group_percentages = (group_sizes / total_count) * 100

# Display group sizes and percentages
print("Group Sizes:")
print(group_sizes)

print("\nGroup Percentages:")
print(group_percentages)





Group Sizes:
season
LR2020    2033
LR2021    7945
SR2020    6163
SR2021    9927
dtype: int64

Group Percentages:
season
LR2020     7.798834
LR2021    30.477981
SR2020    23.642013
SR2021    38.081172
dtype: float64


In [11]:
# define hyper parameters
batch_size = 64
image_height = 224
image_weight = 224
num_of_output_classes = 10

In [5]:
# image preprocessing helper functions
def get_image_from_file_path(filename, isTrain):
    if(isTrain):
        image_path = "..\\content\\train\\"+filename
    else:
        image_path = "..\\content\\test\\"+filename
        
    return  tf.io.read_file(image_path)

def preprocess_image(filename, label):
    # retrieve the image from the file path
    image = get_image_from_file_path(filename)
    
    # 3 channels = RGB, 1 Channel = Greyscale
    image = tf.image.decode_jpeg(image, channels=3) 
    
    # Resize the image to the desired dimensions (e.g., 224x224)
    image = tf.image.resize(image, (image_height, image_weight))
    
    # Normalize the image to the range [0, 1]
    image = image / 255.0
    
    # Ensure the image has the correct data type
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    return image, label    

In [6]:
# Create a tensorflow dataset from the dataframe
image_filenames = df_train['filename'].values
image_labels = df_train['extent'].values
dataset = tf.data.Dataset.from_tensor_slices((image_filenames, image_labels))
dataset = dataset.map(preprocess_image)

test_image_filenames = df_test['filename'].values
test_image_labels = df_test['e']



# Split the dataset into training and validation sets
dataset_size = len(list(dataset))
train_size = int(0.7 * dataset_size)
val_size = int(0.3 * dataset_size)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size).take(val_size)

# create data loaders
train_data_loader = train_dataset.batch(batch_size)
val_data_loader = val_dataset.batch(batch_size)

In [7]:
# testing out the image processing helper functions
# image = get_image_from_file_path("L1083F00930C39S12674Ip.jpg")
# image = tf.image.decode_jpeg(image, channels=3)  # Adjust channels as needed

# Convert the image tensor to a NumPy array
# image_array = image.numpy()

# Display the image using Matplotlib
# plt.imshow(image_array)
# plt.axis('off')  # Optionally, hide the axis
# plt.show()



In [14]:
# create the cnn model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_weight, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_of_output_classes, activation='softmax')
])

In [15]:
# compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
# train the model
model.fit(train_data_loader,validation_data=val_data_loader, epochs=5)

Epoch 1/5


  return dispatch_target(*args, **kwargs)




KeyboardInterrupt: 