# Черновики

In [None]:
import os
import fnmatch

def read_gitignore(gitignore_path):
    ignored = []
    if os.path.exists(gitignore_path):
        with open(gitignore_path, 'r') as file:
            ignored = [line.strip() for line in file.readlines() if line.strip() and not line.startswith('#')]
    return ignored

def print_directory_structure(start_path, exclude_dirs, exclude_files):
    exclude_dirs = [os.path.normpath(os.path.join(start_path, ed)) for ed in exclude_dirs]  # Нормализуем пути
    gitignore_path = os.path.join(start_path, '.gitignore')
    ignored = read_gitignore(gitignore_path)
    
    for root, dirs, files in os.walk(start_path, topdown=True):
        dirs[:] = [d for d in dirs if os.path.normpath(os.path.join(root, d)) not in exclude_dirs]
        if '.git' in dirs:  # Явное исключение папки .git
            dirs.remove('.git')

        level = root.replace(start_path, '').count(os.sep)
        indent = ' ' * 4 * level
        print(f'{indent}{os.path.basename(root)}/')
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            relative_path = os.path.normpath(os.path.join(root, f)).replace(start_path, '').lstrip(os.sep)
            if not any(fnmatch.fnmatch(relative_path, pattern) for pattern in ignored + exclude_files):
                print(f'{subindent}{f}')

start_path = '.'  # Исходный путь, с которого начинается обход
exclude_dirs = ['data/raw', 'data/interim', 'data/benchmarked']  # Список папок для исключения
exclude_files = []  # Список файлов для исключения будет заполнен на основе .gitignore
print_directory_structure(start_path, exclude_dirs, exclude_files)

./
    .env
    .gitignore
    CLIP_finetuning_v3 (1).ipynb
    example_files.txt
    explore_errors.ipynb
    finetuned_clip_model.pth
    finetuned_clip_model_roc093_fixme.pth
    finetuned_clip_model_roc099_fixme.pth
    inference (5).ipynb
    inference (7).ipynb
    inference.ipynb
    LICENSE
    Makefile
    prepare_data_for_finetuning.ipynb
    pyproject.toml
    README.md
    requirements.txt
    setup.py
    test_environment.py
    test_files.txt
    tox.ini
    __init__.py
    data/
        benchmarked.rar
        external/
            .gitkeep
        processed/
            .gitkeep
    docs/
        commands.rst
        conf.py
        getting-started.rst
        index.rst
        make.bat
        Makefile
    models/
        .gitkeep
        finetuned_clip_model_roc093.pth
        finetuned_clip_model_roc099.pth
        finetuned_clip_model_roc09997.pth
        __pycache__/
            evaluation.cpython-311.pyc
    notebooks/
        .gitkeep
        Clip_finetuning_data

In [None]:
import re
import os

# Функция для поиска и записи информации о функциях и комментариях
def find_functions_and_comments(file_paths, output_file='functions_and_comments.txt'):
    func_pattern = re.compile(r'def\s+([a-zA-Z_\u0400-\u04FF][a-zA-Z0-9_\u0400-\u04FF]*)\s*\((.*?)\):')
    comment_pattern = re.compile(r'"""(.*?)"""', re.DOTALL)

    with open(output_file, 'w', encoding='utf-8') as out_f:
        for path in file_paths:
            if os.path.isfile(path) and path.endswith('.py'):
                with open(path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    functions = func_pattern.finditer(content)
                    out_f.write(f'Path: {path}\n')
                    for func in functions:
                        start_index = func.end()
                        comment_match = comment_pattern.search(content, start_index)
                        if comment_match:
                            func_name = func.group(1)
                            comment = comment_match.group(1).strip()
                            out_f.write(f'Function: {func_name}\nComment: {comment}\n\n')
                    out_f.write(f'\n\n')
                    

# Пример использования
file_paths = [
    "src/data/crop_squares.py",
    "src/data/data_for_finetuning.py",
    "src/data/prediction.py",
    "src/data/prepare_dataset.py",
    "src/data/preprocess_labels.py",
    "src/data/preprocessing.py",
    "src/models/clip_finetune.py",
    "src/models/evaluation.py",
    "src/train.py"
]
find_functions_and_comments(file_paths)

In [None]:
def extract_imports(file_path):
    with open(file_path, "r", encoding='utf-8') as file:
        # Проходим по каждой строке в файле
        for line in file:
            # Обрезаем пробельные символы с обеих сторон строки
            stripped_line = line.strip()
            # Проверяем, начинается ли строка с 'import' или 'from'
            if stripped_line.startswith("import") or stripped_line.startswith("from"):
                print(f"{file_path}: {stripped_line}")

# Обходим все скрипты в списке и извлекаем строки с импортами
for script in file_paths:
    extract_imports(script)

src/data/crop_squares.py: import numpy as np
src/data/crop_squares.py: import matplotlib.pyplot as plt
src/data/crop_squares.py: import cv2
src/data/crop_squares.py: from copy import deepcopy
src/data/crop_squares.py: from PIL import Image, ImageDraw
src/data/crop_squares.py: from typing import Optional, Any, List
src/data/crop_squares.py: import os
src/data/data_for_finetuning.py: from PIL import Image
src/data/data_for_finetuning.py: import json
src/data/data_for_finetuning.py: import json
src/data/prepare_dataset.py: import pandas as pd
src/data/prepare_dataset.py: from torch.utils.data import Dataset, DataLoader
src/data/prepare_dataset.py: from torchvision import transforms
src/data/prepare_dataset.py: from PIL import Image
src/data/prepare_dataset.py: import numpy as np
src/data/prepare_dataset.py: import torch
src/data/preprocess_labels.py: import easyocr
src/data/preprocess_labels.py: import os
src/data/preprocess_labels.py: from PIL import Image
src/data/preprocessing.py: from