In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'wonders-of-the-world-image-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2144356%2F3569211%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20241011%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20241011T232804Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D2fadb37db77984adf1e535ab88e994dcf4ca7ff0c40dd90a948eb72dc45a5d45b6f981e1f7ebaa03c5a6eea51868a151cb65dc0c96b61011db9a9f0ca1b7fa8bd9588f10582c5d0eee381655adbb7543ec4b14a83870f4a2b129c17b426f8d05a5d8f3c432b68d84c3cdc040e30852529bd984991e220ea9c3cacbfadb532fe9c174878eba1e81e67a95ced40e9d86afed9e25c2ba864ffc098b96bf059612b7f35049004babb3c7dee62f5ea1a7443eba5cfe1919f47fd982fabb78d9d055dcf6fbb1a45f58de20ee46112b209971ab08243571774701779ecff8c5e7ce6802394e0e0dc6b066a440927820d235599dd12f08fe0b63ee03d1da96d217c1677f'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')

Downloading wonders-of-the-world-image-classification, 453078359 bytes compressed
Downloaded and uncompressed: wonders-of-the-world-image-classification
Data source import complete.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tensorflow import keras
import tensorflow as tf
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import random
import time
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import os
import shutil
import random

In [None]:

seed = int(time.time())
random.seed(seed) # Generar una semilla aleatoria basada en el tiempo actual

seed = 1728676400 # Como el número que salió al correr esta celda por primera vez el 11/10/2024 a las 16:54 fue 1728676400, le asignamos este valor a la variable seed

# Establecer la semilla global para Tensorflow
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)

def create_directories(source_dir, dest_dirs):
    """
    Crea directorios en los destinos especificados con los mismos nombres de carpetas que en el directorio de origen.

    :param source_dir: Directorio de origen que contiene las carpetas.
    :param dest_dirs: Lista de directorios de destino donde se crearán las carpetas.
    """
    for category in os.listdir(source_dir):
        category_path = os.path.join(source_dir, category)
        if os.path.isdir(category_path):
            for dest_dir in dest_dirs:
                dest_category_dir = os.path.join(dest_dir, category)
                os.makedirs(dest_category_dir, exist_ok=True)

# Directorio de origen
source_dir = '/kaggle/input/wonders-of-the-world-image-classification/Wonders of World/Wonders of World'

# Directorios de destino
train_dir = '/kaggle/working/train'
val_dir = '/kaggle/working/val'
test_dir = '/kaggle/working/test'

# Crear directorios
create_directories(source_dir, [train_dir, val_dir, test_dir])

def split_data(source_dir, train_dir, val_dir, test_dir, val_split=0.15, test_split=0.15):

    for category in os.listdir(source_dir):
        category_path = os.path.join(source_dir, category)
        if os.path.isdir(category_path):
            images = os.listdir(category_path)
            random.shuffle(images)
            num_images = len(images)
            num_test = int(num_images * test_split)
            num_val = int(num_images * val_split)
            num_train = num_images - num_test - num_val

            train_images = images[:num_train]
            val_images = images[num_train:num_train + num_val]
            test_images = images[num_train + num_val:]

            for image in train_images:
                shutil.move(os.path.join(category_path, image), os.path.join(train_dir, category, image))
            for image in val_images:
                shutil.move(os.path.join(category_path, image), os.path.join(val_dir, category, image))
            for image in test_images:
                shutil.move(os.path.join(category_path, image), os.path.join(test_dir, category, image))

# Directorios de origen y destino
source_dir = '/kaggle/input/wonders-of-the-world-image-classification/Wonders of World/Wonders of World'
train_dir = '/kaggle/working/train'
val_dir = '/kaggle/working/val'
test_dir = '/kaggle/working/test'

# Dividir los datos
split_data(source_dir, train_dir, val_dir, test_dir)

In [None]:
# Generadores para entrenar, validar y testear
datagen_train = ImageDataGenerator(rescale=1./255)
datagen_val = ImageDataGenerator(rescale=1./255)
datagen_test = ImageDataGenerator(rescale=1./255)

train_generator = datagen_train.flow_from_directory(
   '/kaggle/working/train',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    seed=seed
)

val_generator = datagen_val.flow_from_directory(
    '/kaggle/working/val',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    seed=seed
)

test_generator = datagen_test.flow_from_directory(
    '/kaggle/working/test',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    seed=seed
)

Found 2706 images belonging to 12 classes.
Found 570 images belonging to 12 classes.
Found 570 images belonging to 12 classes.


In [None]:
model = models.Sequential([
    layers.InputLayer(input_shape=(150, 150, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(12, activation='softmax'),
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 74, 74, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 36, 36, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 17, 17, 128)      

In [None]:
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=val_generator,
    validation_steps= val_generator.samples // val_generator.batch_size,
    epochs=6
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.src.callbacks.History at 0x7953cc2c5c00>

In [None]:
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print('Test accuracy:', test_acc)

# Guardar el modelo en formato .keras
model.save('/kaggle/working/model.keras')

# Convertir el modelo a TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Guardar el modelo convertido en formato .tflite
with open('/kaggle/working/model.tflite', 'wb') as f:
    f.write(tflite_model)

Test accuracy: 0.7628676295280457
