In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

original_train = pd.read_csv('D:/dataset/_eyepacs/data/trainLabels.csv')
original_train['level'].unique()

array([0, 1, 2, 4, 3])

In [5]:
original_train.loc[original_train['level'] == 0, 'level'] = 0
original_train.loc[original_train['level'].isin([1, 2, 3]), 'level'] = 1
original_train.loc[original_train['level'].isin([4]), 'level'] = 2


In [9]:
original_train = original_train.rename(columns={'level':'true_label','image':'image_name'})
df = original_train.copy()

In [11]:

# Убедимся, что level — это класс
df['true_label'] = df['true_label'].astype(int)

# Stratified split — 80% train / 20% test
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['true_label'],
    random_state=42
)

# Проверим распределение
print("Train class distribution:")
print(train_df['true_label'].value_counts(normalize=True))
print("\nTest class distribution:")
print(test_df['true_label'].value_counts(normalize=True))

# Сохраняем
train_df.to_csv("D:/dataset/_eyepacs/data/train_dirty_split.csv", index=False)
test_df.to_csv("D:/dataset/_eyepacs/data/test_dirty_split.csv", index=False)

Train class distribution:
true_label
0    0.734804
1    0.245053
2    0.020142
Name: proportion, dtype: float64

Test class distribution:
true_label
0    0.734700
1    0.245090
2    0.020211
Name: proportion, dtype: float64


In [5]:
import os
import shutil
import pandas as pd
from tqdm import tqdm
from PIL import Image

# Пути
csv_train = "D:/dataset/_eyepacs/data/eyepacs_split/train_dirty_split.csv"
csv_test = "D:/dataset/_eyepacs/data/eyepacs_split/test_dirty_split.csv"
# source_dir = "D:/dataset/_eyepacs/data/data_1_baseline512"  # где лежат .jpeg изображения
# output_dir = "D:/dataset/_eyepacs/data/eyepacs_split"  # куда будем копировать
source_dir = "D:/dataset/_eyepacs/data/ochishenii_fon_512"  # где лежат .jpeg изображения
output_dir = "D:/dataset/_eyepacs/data/ochishenii_fon_512_split"  # куда будем копировать
# Создание структуры папок
for split in ['train', 'test']:
    for label in range(5):
        os.makedirs(os.path.join(output_dir, split, str(label)), exist_ok=True)

def copy_and_rename_images(df, split_name):
    for _, row in tqdm(df.iterrows(), total=len(df)):
        original_name = row['image_name'] + '.jpeg'
        new_name = row['image_name'] + ".jpg"
        label = str(row['true_label'])

        src_path = os.path.join(source_dir, original_name)
        dst_path = os.path.join(output_dir, split_name, label, new_name)

        if os.path.exists(src_path):
            try:
                # Открываем и сохраняем изображение как .jpg
                img = Image.open(src_path).convert("RGB")
                img.save(dst_path, "JPEG", quality=95)
            except Exception as e:
                print(f"Error converting {src_path}: {e}")
        else:
            print(f"File not found: {src_path}")

# Загрузка CSV и копирование с изменением расширения
train_df = pd.read_csv(csv_train)
test_df = pd.read_csv(csv_test)

# copy_and_rename_images(train_df, "train")
copy_and_rename_images(test_df, "test")


100%|██████████| 7026/7026 [00:56<00:00, 123.28it/s]


In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import os

# После удаления яркости
csv_path = "D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/train_all_image.csv"
output_dir = "D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds"
os.makedirs(output_dir, exist_ok=True)

# Загрузка данных
df = pd.read_csv(csv_path)

# Настройка количества фолдов
n_splits = 3  # Можно поставить 5, если хочешь более точную оценку
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Разделение и сохранение по фолдам
for fold, (train_idx, val_idx) in enumerate(skf.split(df['image_name'], df['true_label'])):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)
    
    train_path = os.path.join(output_dir, f"fold_{fold}_ochilshenii_po_yarcosti_train.csv")
    val_path = os.path.join(output_dir, f"fold_{fold}_ochilshenii_po_yarcosti_val.csv")
    
    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)

    print(f" Saved: {train_path}, {val_path}")


✅ Saved: D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_0_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_0_ochilshenii_po_yarcosti_val.csv
✅ Saved: D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_1_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_1_ochilshenii_po_yarcosti_val.csv
✅ Saved: D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_2_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/ochisheni_no_yarkosti/folds\fold_2_ochilshenii_po_yarcosti_val.csv


In [2]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import os

# После удаления аномалий
csv_path = "D:/dataset/_eyepacs/data/filtered_data_with_cosine/train_all_image.csv"
output_dir = "D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds"
os.makedirs(output_dir, exist_ok=True)

# Загрузка данных
df = pd.read_csv(csv_path)

# Настройка количества фолдов
n_splits = 3  # Можно поставить 5, если хочешь более точную оценку
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Разделение и сохранение по фолдам
for fold, (train_idx, val_idx) in enumerate(skf.split(df['image_name'], df['true_label'])):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)
    
    train_path = os.path.join(output_dir, f"fold_{fold}_ochilshenii_po_yarcosti_train.csv")
    val_path = os.path.join(output_dir, f"fold_{fold}_ochilshenii_po_yarcosti_val.csv")
    
    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)

    print(f" Saved: {train_path}, {val_path}")


 Saved: D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_0_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_0_ochilshenii_po_yarcosti_val.csv
 Saved: D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_1_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_1_ochilshenii_po_yarcosti_val.csv
 Saved: D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_2_ochilshenii_po_yarcosti_train.csv, D:/dataset/_eyepacs/data/filtered_data_with_cosine/folds\fold_2_ochilshenii_po_yarcosti_val.csv


In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import os

# После удаления евклидова
csv_path = "D:/dataset/_eyepacs/data/center_evklid/center_evklid_train.csv"
output_dir = "D:/dataset/_eyepacs/data/center_evklid/folds"
os.makedirs(output_dir, exist_ok=True)

# Загрузка данных
df = pd.read_csv(csv_path)

# Настройка количества фолдов
n_splits = 3  # Можно поставить 5, если хочешь более точную оценку
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Разделение и сохранение по фолдам
for fold, (train_idx, val_idx) in enumerate(skf.split(df['image_name'], df['true_label'])):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)
    
    train_path = os.path.join(output_dir, f"fold_{fold}_center_evklid_train.csv")
    val_path = os.path.join(output_dir, f"fold_{fold}_center_evklid_val.csv")
    
    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)

    print(f" Saved: {train_path}, {val_path}")


 Saved: D:/dataset/_eyepacs/data/center_evklid/folds\fold_0_center_evklid_train.csv, D:/dataset/_eyepacs/data/center_evklid/folds\fold_0_center_evklid_val.csv
 Saved: D:/dataset/_eyepacs/data/center_evklid/folds\fold_1_center_evklid_train.csv, D:/dataset/_eyepacs/data/center_evklid/folds\fold_1_center_evklid_val.csv
 Saved: D:/dataset/_eyepacs/data/center_evklid/folds\fold_2_center_evklid_train.csv, D:/dataset/_eyepacs/data/center_evklid/folds\fold_2_center_evklid_val.csv


In [2]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import os

# После удаления евклидова
csv_path = "D:/dataset/_eyepacs/data/combined_cosine_evklid/combined_cosine_evklid.csv"
output_dir = "D:/dataset/_eyepacs/data/combined_cosine_evklid/folds"
os.makedirs(output_dir, exist_ok=True)

# Загрузка данных
df = pd.read_csv(csv_path)

# Настройка количества фолдов
n_splits = 3  # Можно поставить 5, если хочешь более точную оценку
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Разделение и сохранение по фолдам
for fold, (train_idx, val_idx) in enumerate(skf.split(df['image_name'], df['true_label'])):
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)
    
    train_path = os.path.join(output_dir, f"fold_{fold}_combined_cosine_evklid_train.csv")
    val_path = os.path.join(output_dir, f"fold_{fold}_combined_cosine_evklid_val.csv")
    
    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)

    print(f" Saved: {train_path}, {val_path}")


 Saved: D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_0_combined_cosine_evklid_train.csv, D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_0_combined_cosine_evklid_val.csv
 Saved: D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_1_combined_cosine_evklid_train.csv, D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_1_combined_cosine_evklid_val.csv
 Saved: D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_2_combined_cosine_evklid_train.csv, D:/dataset/_eyepacs/data/combined_cosine_evklid/folds\fold_2_combined_cosine_evklid_val.csv
