# EAST ICDAR 2015 Colab Setup

This notebook helps you upload, verify, and inspect your ICDAR 2015 dataset for EAST training using Colab's file upload widget.

**Steps:**
1. Import required libraries
2. Upload your zipped `data/icdar2015` folder
3. Extract and organize files
4. Verify folder structure
5. List files in train, test, split
6. Load and display sample data

In [None]:
# 1. Import Required Libraries
import os
import shutil
from IPython.display import display, Image
from google.colab import files

## 2. Set Up File Upload Widget
Use the widget below to upload your zipped `data/icdar2015` folder (e.g., `icdar2015.zip`).

In [None]:
# 3. Upload Data Folder via Colab
uploaded = files.upload()

# Assume the user uploads 'icdar2015.zip'
import zipfile
for fname in uploaded.keys():
    if fname.endswith('.zip'):
        with zipfile.ZipFile(fname, 'r') as zip_ref:
            zip_ref.extractall('.')
        print(f"Extracted {fname}")
        # Move to data/icdar2015 if needed
        if os.path.exists('icdar2015'):
            if not os.path.exists('data'):
                os.makedirs('data')
            shutil.move('icdar2015', 'data/icdar2015')
            print("Moved 'icdar2015' to 'data/icdar2015'")

## 4. Verify Uploaded Folder Structure
Check that the extracted folder contains the expected subdirectories: train, test, splits.

In [None]:
# Check folder structure
base_dir = 'data/icdar2015'
expected_dirs = ['train', 'test', 'splits']
for d in expected_dirs:
    path = os.path.join(base_dir, d)
    print(f"{d}: {'Exists' if os.path.exists(path) else 'Missing'}")

## 5. List Files in Train, Test, and Splits Directories
Display the files found in each directory to confirm successful upload.

In [None]:
import os

def list_files_in_dir(dir_path):
    if os.path.exists(dir_path):
        files = os.listdir(dir_path)
        print(f"Files in {dir_path}:")
        for f in files:
            print(f"  {f}")
        print()
    else:
        print(f"Directory {dir_path} does not exist.\n")

base_dir = '/content/icdar2015'  # Update if your extraction path differs
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
splits_dir = os.path.join(base_dir, 'splits')

list_files_in_dir(train_dir)
list_files_in_dir(test_dir)
list_files_in_dir(splits_dir)


## 6. Display Sample Data from Train Directory
Load and display a sample image and its annotation from the train directory to verify data integrity.

In [None]:
import cv2
import matplotlib.pyplot as plt

# Find a sample image and its annotation in the train directory
image_files = [f for f in os.listdir(train_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
annotation_files = [f for f in os.listdir(train_dir) if f.lower().endswith('.txt')]

if image_files:
    sample_image = image_files[0]
    img_path = os.path.join(train_dir, sample_image)
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 8))
    plt.imshow(img_rgb)
    plt.title(f"Sample Image: {sample_image}")
    plt.axis('off')
    plt.show()
    
    # Display corresponding annotation if exists
    ann_name = os.path.splitext(sample_image)[0] + '.txt'
    if ann_name in annotation_files:
        ann_path = os.path.join(train_dir, ann_name)
        with open(ann_path, 'r') as f:
            annotation = f.read()
        print(f"Annotation for {sample_image}:")
        print(annotation)
    else:
        print(f"No annotation found for {sample_image}.")
else:
    print("No image files found in train directory.")
