# Подготовка данных

Был выбран датасет с изображениями различных погодных явлений.
Ссылка: https://www.kaggle.com/datasets/jehanbhathena/weather-dataset

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
import keras.datasets
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
from matplotlib import pyplot as plt

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf

In [None]:
print(tf.__version__)

2.17.0


In [None]:
# Вставить файл kaggle.json
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"meow000","key":"1f9a45bf5eabdb40310576ae3db63518"}'}

In [None]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

rm: cannot remove '/root/.kaggle': No such file or directory


In [None]:
!kaggle datasets download jehanbhathena/weather-dataset

Dataset URL: https://www.kaggle.com/datasets/jehanbhathena/weather-dataset
License(s): CC0-1.0
Downloading weather-dataset.zip to /content
 99% 582M/587M [00:15<00:00, 33.2MB/s]
100% 587M/587M [00:15<00:00, 39.4MB/s]


In [None]:
!mkdir Data
!cp /content/drive/MyDrive/Kaggle/weather-dataset.zip /content/weather-dataset.zip
!unzip -q /content/weather-dataset.zip -d /content/Data
!rm /content/Data/weather-dataset.zip


cp: cannot stat '/content/drive/MyDrive/Kaggle/weather-dataset.zip': No such file or directory
rm: cannot remove '/content/Data/weather-dataset.zip': No such file or directory


In [None]:
import pathlib
data_dir = pathlib.Path("/content/Data/dataset")
image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

6862


In [None]:
import os
from PIL import Image

#Удаляем файлы неподходящего формта, если есть
allowed_formats = ('.jpeg', '.jpg', '.png')


def is_image_valid(file_path):
    try:
        with Image.open(file_path) as img:
            img.verify()
        return True
    except (IOError, ValueError):
        return False


for class_name in os.listdir(data_dir):
    class_path = os.path.join(data_dir, class_name)

    if os.path.isdir(class_path):
        for filename in os.listdir(class_path):
            file_path = os.path.join(class_path, filename)
            if os.path.isfile(file_path):

                if not filename.lower().endswith(allowed_formats):
                    print(f"Deleting invalid file: {file_path}")
                    os.remove(file_path)

                elif not is_image_valid(file_path):
                    print(f"Deleting corrupted file: {file_path}")
                    os.remove(file_path)

print("Image cleaning complete.")


Image cleaning complete.


In [None]:
img_height, img_width = 224, 224
num_classes = 11
batch_size = 32
epochs = 30
shuffle_buffer_size = 10000

# Выделяем изображения для тренировки (70%)
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.3,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

# Выделяем изображения для валидации (15%)
val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.3,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

# Выделяем изображения для тестирования (15%)
test_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.15,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

class_names = train_ds.class_names
print(class_names)

# Нормализация данных
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))

# Перемешиваем данные для обучения и валидации
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(shuffle_buffer_size).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

Found 6862 files belonging to 11 classes.
Using 4804 files for training.
Found 6862 files belonging to 11 classes.
Using 2058 files for validation.
Found 6862 files belonging to 11 classes.
Using 1029 files for validation.
['dew', 'fogsmog', 'frost', 'glaze', 'hail', 'lightning', 'rain', 'rainbow', 'rime', 'sandstorm', 'snow']


# Работа с моделью

In [None]:
from keras.applications import ResNet50V2 as resnet
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# В качетсве предобученной модели выбрали ResNet50V2
resnet_model = resnet(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
resnet_model.summary()
resnet_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
# Пишем простой классификатор и соединяем его с ResNet50V2 для дообучения
model = tf.keras.Sequential([
    resnet_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(class_names), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(train_ds, validation_data=val_ds, epochs=10)

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 251ms/step - accuracy: 0.5546 - loss: 4.3544 - val_accuracy: 0.7454 - val_loss: 0.8150
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 98ms/step - accuracy: 0.7616 - loss: 0.9360 - val_accuracy: 0.7838 - val_loss: 0.7613
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 106ms/step - accuracy: 0.7767 - loss: 0.7907 - val_accuracy: 0.7969 - val_loss: 0.7213
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 101ms/step - accuracy: 0.8024 - loss: 0.7416 - val_accuracy: 0.7804 - val_loss: 1.0239
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 108ms/step - accuracy: 0.8132 - loss: 0.7768 - val_accuracy: 0.8061 - val_loss: 0.7343
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 101ms/step - accuracy: 0.8369 - loss: 0.6072 - val_accuracy: 0.8076 - val_loss: 0.7861
Epoch 7/10


<keras.src.callbacks.history.History at 0x7f8023f23520>

In [None]:
loss, accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 67ms/step - accuracy: 0.8190 - loss: 0.9254
Validation Accuracy: 82.02%


Сейчас точность составляет приблизительно 82%. Попрбуем ее улучшить

In [None]:
# Добавляем аугментацию
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1)
])

model = tf.keras.Sequential([
    data_augmentation,
    resnet_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Тренируем модель с большим количеством эпох и early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=[early_stopping])

Epoch 1/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 180ms/step - accuracy: 0.6303 - loss: 1.1284 - val_accuracy: 0.8416 - val_loss: 0.4789
Epoch 2/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 155ms/step - accuracy: 0.8455 - loss: 0.4645 - val_accuracy: 0.8338 - val_loss: 0.4805
Epoch 3/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 155ms/step - accuracy: 0.8560 - loss: 0.4071 - val_accuracy: 0.8474 - val_loss: 0.4466
Epoch 4/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 154ms/step - accuracy: 0.8853 - loss: 0.3139 - val_accuracy: 0.8503 - val_loss: 0.4263
Epoch 5/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 154ms/step - accuracy: 0.9028 - loss: 0.2743 - val_accuracy: 0.8450 - val_loss: 0.4562
Epoch 6/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 156ms/step - accuracy: 0.9055 - loss: 0.2641 - val_accuracy: 0.8576 - val_loss: 0.4394
Epoch 7/30

In [None]:
model.save('my_model.keras')

In [None]:
loss, accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 106ms/step - accuracy: 0.8390 - loss: 0.4453
Validation Accuracy: 85.03%


Точность повысилась до 85%, продолжаем усложнять

In [None]:
# Размораживаем некоторые слои
resnet_model.trainable = True
fine_tune_at = 100

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history_fine = model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=[early_stopping])

Epoch 1/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 484ms/step - accuracy: 0.7401 - loss: 0.7560 - val_accuracy: 0.8367 - val_loss: 0.4523
Epoch 2/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 465ms/step - accuracy: 0.8642 - loss: 0.4035 - val_accuracy: 0.8596 - val_loss: 0.4076
Epoch 3/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 465ms/step - accuracy: 0.8966 - loss: 0.3159 - val_accuracy: 0.8717 - val_loss: 0.3793
Epoch 4/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 464ms/step - accuracy: 0.9094 - loss: 0.2754 - val_accuracy: 0.8771 - val_loss: 0.3632
Epoch 5/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 465ms/step - accuracy: 0.9335 - loss: 0.2186 - val_accuracy: 0.8839 - val_loss: 0.3526
Epoch 6/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 466ms/step - accuracy: 0.9345 - loss: 0.1944 - val_accuracy: 0.8863 - val_loss: 0.3438
Epoch 7/3

In [None]:
model.save('new_fined_model.keras')

In [None]:
loss, accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 105ms/step - accuracy: 0.8768 - loss: 0.3629
Validation Accuracy: 89.21%


Точность стала приблизительно 89%. Продолжим ее улучшать

Используем keras tuner для подбора наилучших гиперпараметров

In [None]:
!pip install keras-tuner --upgrade

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
from keras_tuner import HyperModel
import keras_tuner as kt
from tensorflow.keras.models import load_model

# Функция для создания модели с учетом гиперпараметров
def build_model(hp):
    # Определим количество нейронов для плотного слоя
    hp_units = hp.Int('units', min_value=128, max_value=512, step=64)

    # Определим скорость обучения
    hp_learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-3, sampling='log')

    # Определим dropout
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step=0.1)

    new_model = model

    # Компиляция модели
    new_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return new_model

In [None]:
# Настройка Hyperband-тюнера для поиска наилучших гиперпараметров
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=30,
    factor=3,
    directory='kt_search',  # Папка для сохранения результатов
    project_name='weather_classification'
)

# Ранний стоппинг для предотвращения переобучения
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Запуск поиска гиперпараметров
tuner.search(train_ds, validation_data=val_ds, epochs=20, callbacks=[stop_early])

Trial 15 Complete [00h 04m 13s]
val_accuracy: 0.8809523582458496

Best val_accuracy So Far: 0.8809523582458496
Total elapsed time: 01h 01m 00s

Search: Running Trial #16

Value             |Best Value So Far |Hyperparameter
512               |256               |units
5.1819e-05        |1.499e-05         |learning_rate
0.4               |0.4               |dropout
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
3                 |3                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 714ms/step - accuracy: 0.9962 - loss: 0.0139 - val_accuracy: 0.8683 - val_loss: 0.8316
Epoch 2/2
[1m 17/151[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m55s[0m 413ms/step - accuracy: 0.9950 - loss: 0.0180

KeyboardInterrupt: 

In [None]:
# Получение наилучших гиперпараметров
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Лучшие гиперпараметры:
- Число нейронов в плотном слое: {best_hps.get('units')}
- Dropout: {best_hps.get('dropout')}
- Скорость обучения: {best_hps.get('learning_rate')}
""")


Лучшие гиперпараметры:
- Число нейронов в плотном слое: 256
- Dropout: 0.4
- Скорость обучения: 1.4990131560328785e-05



In [None]:
# Модель уже настроена и подобрана
new_model = tuner.hypermodel.build(best_hps)

# Обучение модели с лучшими гиперпараметрами
history = new_model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=[stop_early])

# Проверка точности на тестовой выборке
test_loss, test_acc = new_model.evaluate(test_ds)
print(f'Test Accuracy after hyperparameter tuning: {test_acc:.2f}')

Epoch 1/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 478ms/step - accuracy: 0.9938 - loss: 0.0150 - val_accuracy: 0.8819 - val_loss: 0.7121
Epoch 2/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 463ms/step - accuracy: 0.9982 - loss: 0.0071 - val_accuracy: 0.8800 - val_loss: 0.7512
Epoch 3/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 463ms/step - accuracy: 0.9976 - loss: 0.0089 - val_accuracy: 0.8810 - val_loss: 0.7488
Epoch 4/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 463ms/step - accuracy: 0.9979 - loss: 0.0068 - val_accuracy: 0.8795 - val_loss: 0.7521
Epoch 5/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 465ms/step - accuracy: 0.9970 - loss: 0.0079 - val_accuracy: 0.8795 - val_loss: 0.7728
Epoch 6/30
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 464ms/step - accuracy: 0.9988 - loss: 0.0062 - val_accuracy: 0.8805 - val_loss: 0.7454
[1m 3/33

На этом этапе закончились ресурсы бесплатного колаба 😢 Приблизительная точность, которая была достигнута - 88-89%