In [None]:
# v1.01 - Process entire training set (requires training set - download it using downloads.ipynb)

# Module for processing entire training set

In [None]:
# Full pipeline for processing images in a folder
import os
import main as STtest2
import shutil
import ocr
import modules.detect_car as detect

def process_all_images_in_folder(folder_path: str) -> None:
    """
    Process all images in a given folder. The function performs OCR to read mileage, identifies car type,
    and moves unreadable and multi-read images to separate folders.

    To function properly, use downloads.ipynb to download the necessary files and model.
    
    Args:
        folder_path (str): The path to the folder containing images to be processed.

    Returns:
        None
    """
    # Get list of all files in the folder
    filenames = os.listdir(folder_path)

    # Set up folders for unreadable and multi-read images
    unreadable_folder = 'data/training-set/unreadable'
    multi_read_folder = 'data/training-set/multi_read'
    os.makedirs(unreadable_folder, exist_ok=True)
    os.makedirs(multi_read_folder, exist_ok=True)

    
    # Iterate over all files in the folder
    for i, filename in enumerate(filenames, start=1):
        # Create full path to the file
        full_path = os.path.join(folder_path, filename)
        
        # Check if the file is an image
        if full_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Otwórz obraz
            with open(full_path, 'rb') as f:
                img_cv = STtest2.load_image(f)
   
            # Read OCR
            mileage = ocr.mileage_ocr(img_cv)
            
            # Identify car type
            car_type = detect.identify(full_path)

            # If mileage is None, move the file to unreadable folder
            if mileage is None:
                shutil.move(full_path, os.path.join(unreadable_folder, filename))
                continue

            # If mileage is a list with more than one element, move the file to multi-read folder
            if isinstance(mileage, list) and len(mileage) > 1:
                shutil.move(full_path, os.path.join(multi_read_folder, filename))
                continue

            # Extract and append data to JSON file
            STtest2.extract_and_append_data(full_path, mileage, car_type)

            # Display the image
            STtest2.load_and_display_data(full_path)

            # Print loop progress
            print(f'Eroded image: {filename}, {i}/{len(filenames)}')

# Process all images in the folder
# Set divided into two folders
process_all_images_in_folder('data/training-set/car')
process_all_images_in_folder('data/training-set/truck')

In [8]:
# Read mileage.json file for testing
import pandas as pd
import json
from typing import Optional

def test_json_file(file_path: str) -> Optional[pd.DataFrame]:
    """
    Reads a JSON file into a DataFrame and prints the first five records.

    Args:
        file_path (str): The path to the JSON file.

    Returns:
        df (pd.DataFrame): The DataFrame created from the JSON file, or None if an error occurred.
    """
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
            if data:
                df = pd.DataFrame(data)
                print(df.head(5))
                return df
            else:
                print("File is empty. Consider downloading backup using downloads.ipynb.")
                return None
    except FileNotFoundError:
        print("Plik does not exist. Consider downloading backup using downloads.ipynb.")
        return None
    except json.JSONDecodeError:
        print("Error decoding JSON file. Consider downloading backup using downloads.ipynb.")
        return None

test_json_file('mileage.json')

                                            Filename        Date      Time  \
0  ../RentML/dataset/training/Osobowy\20210824_10...  2021-08-24  10:50:26   
1  ../RentML/dataset/training/Osobowy\20211029_17...  2021-10-29  17:55:36   
2  ../RentML/dataset/training/Osobowy\20211230_18...  2021-12-30  18:51:10   
3  ../RentML/dataset/training/Osobowy\20220130_17...  2022-01-30  17:47:58   
4  ../RentML/dataset/training/Osobowy\20220201_08...  2022-02-01  08:40:13   

    Mileage Type  
0  [250284]  car  
1  [254410]  car  
2  [256163]  car  
3  [257050]  car  
4  [257050]  car  


Unnamed: 0,Filename,Date,Time,Mileage,Type
0,../RentML/dataset/training/Osobowy\20210824_10...,2021-08-24,10:50:26,[250284],car
1,../RentML/dataset/training/Osobowy\20211029_17...,2021-10-29,17:55:36,[254410],car
2,../RentML/dataset/training/Osobowy\20211230_18...,2021-12-30,18:51:10,[256163],car
3,../RentML/dataset/training/Osobowy\20220130_17...,2022-01-30,17:47:58,[257050],car
4,../RentML/dataset/training/Osobowy\20220201_08...,2022-02-01,08:40:13,[257050],car
...,...,...,...,...,...
142,../RentML/dataset/training/Dostawczy\L3H2_2022...,2022-11-11,14:34:00,[369076],truck
143,../RentML/dataset/training/Dostawczy\L4H2_2021...,2021-08-18,15:33:21,[269895],truck
144,../RentML/dataset/training/Dostawczy\L4H2_2023...,2023-06-30,13:20:11,[305059],truck
145,../RentML/dataset/training/Dostawczy\L4H2_2023...,2023-08-23,10:26:37,[308248],truck


# WIPE JSON FILE - CAREFUL - backup available in downloads.ipynb

In [None]:
# Wipe mileage.json file
def wipe_file_content(file_path: str) -> None:
    """
    Wipes the content of a given file.
    If the file does not exist, it will be created.
    
    If used by accident, downloads.ipynb file contains backup of the file.

    Args:
        file_path (str): The path to the file to be wiped.

    Returns:
        None
    """
    with open(file_path, 'w') as f:
        pass

wipe_file_content('mileage.json')