# 🧪 Створення sky_features.csv з images/<label>/*
Цей ноутбук автоматично сканує підпапки в `images/`, витягує ознаки для кожного зображення і зберігає `sky_features.csv` з правильною колонкою `label`.

In [1]:
# 📦 Необхідні бібліотеки
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime

In [2]:
# ⚙️ Функції для обчислення ознак
def compute_cloud_coverage(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    cloud_pixels = np.sum(thresh == 255)
    total_pixels = img.shape[0] * img.shape[1]
    return cloud_pixels / total_pixels

def compute_white_pixel_ratio(img):
    white_pixels = np.sum(np.all(img > 240, axis=2))
    total_pixels = img.shape[0] * img.shape[1]
    return white_pixels / total_pixels

def compute_sun_luminance(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(gray)
    x, y = max_loc
    h, w = img.shape[:2]
    x1, x2 = max(0, x-10), min(w, x+10)
    y1, y2 = max(0, y-10), min(h, y+10)
    roi = gray[y1:y2, x1:x2]
    return np.mean(roi), max_val

def get_time_features(filename):
    try:
        name = os.path.splitext(os.path.basename(filename))[0]
        parts = name.replace('-', '').split('_')
        digits = ''.join(filter(str.isdigit, parts[-1]))
        if len(digits) >= 12:
            hour = int(digits[8:10])
            minute = int(digits[10:12])
            return hour, minute
    except:
        pass
    return -1, -1

In [3]:
# 📁 Обхід папок images/<label>/*
IMAGE_ROOT = 'images'
data = []

for label in sorted(os.listdir(IMAGE_ROOT)):
    label_dir = os.path.join(IMAGE_ROOT, label)
    if not os.path.isdir(label_dir):
        continue
    print(f"Обробка класу: {label}")
    for fname in tqdm(os.listdir(label_dir)):
        if not fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        path = os.path.join(label_dir, fname)
        img = cv2.imread(path)
        if img is None:
            print(f"⚠️ Пропущено: {path}")
            continue
        cloud_coverage = compute_cloud_coverage(img)
        white_pixel_ratio = compute_white_pixel_ratio(img)
        sun_lum, sun_max = compute_sun_luminance(img)
        hour, minute = get_time_features(fname)

        data.append({
            'filename': f'{label}/{fname}',
            'label': label,
            'cloud_coverage': cloud_coverage,
            'white_pixel_ratio': white_pixel_ratio,
            'sun_luminance_mean': sun_lum,
            'sun_luminance_max': sun_max,
            'hour': hour,
            'minute': minute
        })

Обробка класу: cloudy


100%|██████████| 75/75 [00:03<00:00, 21.54it/s]


Обробка класу: dark


100%|██████████| 380/380 [00:14<00:00, 26.02it/s]


Обробка класу: partly cloudy


100%|██████████| 171/171 [00:07<00:00, 23.65it/s]


Обробка класу: sunny


100%|██████████| 1321/1321 [01:04<00:00, 20.44it/s]


Обробка класу: sunrise or sunset


100%|██████████| 184/184 [00:07<00:00, 23.98it/s]


Обробка класу: very cloudy


100%|██████████| 458/458 [00:22<00:00, 20.04it/s]


In [4]:
# 💾 Збереження результатів
df = pd.DataFrame(data)
df.to_csv("sky_features.csv", index=False)
print("✅ Збережено sky_features.csv")
df.head()

✅ Збережено sky_features.csv


Unnamed: 0,filename,label,cloud_coverage,white_pixel_ratio,sun_luminance_mean,sun_luminance_max,hour,minute
0,cloudy/UTC-7_2019_09_08-12_42_44_132406.jpg,cloudy,0.758468,0.072913,239.515,255.0,-1,-1
1,cloudy/UTC-7_2019_09_08-12_42_54_133559.jpg,cloudy,0.756663,0.0717,243.28,255.0,-1,-1
2,cloudy/UTC-7_2019_09_08-12_43_04_134681.jpg,cloudy,0.75384,0.070308,244.205,255.0,-1,-1
3,cloudy/UTC-7_2019_09_08-12_43_14_135856.jpg,cloudy,0.7519,0.069996,240.545,255.0,-1,-1
4,cloudy/UTC-7_2019_09_08-12_43_24_148747.jpg,cloudy,0.748194,0.069477,241.2675,255.0,-1,-1
