# Counterfeit Product Image Detection - Colab Setup

This notebook sets up the environment for data collection on Google Colab.


## Step 1: Mount Google Drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')


## Step 2: Navigate to Project Directory


In [None]:
import os
from pathlib import Path

# Set project directory (adjust path if needed)
PROJECT_DIR = Path('/content/drive/MyDrive/Counterfeit-Product-Image-Detection')

# Create project directory if it doesn't exist
PROJECT_DIR.mkdir(parents=True, exist_ok=True)

# Change to project directory
os.chdir(PROJECT_DIR)

print(f"✓ Working in: {PROJECT_DIR}")
print(f"✓ Current directory: {os.getcwd()}")


## Step 3: Install Dependencies


In [None]:
%pip install -q datasets pillow tqdm requests beautifulsoup4

print("✓ Dependencies installed")


## Step 4: Create Data Directories


In [None]:
# Create data directory structure
data_dir = PROJECT_DIR / 'data' / 'images'
data_dir.mkdir(parents=True, exist_ok=True)
(data_dir / 'authentic').mkdir(exist_ok=True)
(data_dir / 'fake').mkdir(exist_ok=True)

text_dir = PROJECT_DIR / 'data' / 'text'
text_dir.mkdir(parents=True, exist_ok=True)
(text_dir / 'authentic').mkdir(exist_ok=True)
(text_dir / 'fake').mkdir(exist_ok=True)

print(f"✓ Data directories created:")
print(f"  - {data_dir / 'authentic'}")
print(f"  - {data_dir / 'fake'}")
print(f"  - {text_dir / 'authentic'}")
print(f"  - {text_dir / 'fake'}")


## Step 5: Check Storage


In [None]:
import shutil

# Check Drive storage
total, used, free = shutil.disk_usage("/content/drive/MyDrive")
print(f"Google Drive Storage:")
print(f"  Total: {total / (1024**3):.2f} GB")
print(f"  Used: {used / (1024**3):.2f} GB")
print(f"  Free: {free / (1024**3):.2f} GB")

# Check data folder size if it exists
data_path = PROJECT_DIR / 'data'
if data_path.exists():
    total_size = sum(f.stat().st_size for f in data_path.rglob('*') if f.is_file())
    print(f"\nCurrent data folder size: {total_size / (1024**3):.2f} GB")
else:
    print("\nData folder not found (will be created during data collection)")


## Step 6: Run Data Collection

Now you're ready to run the data collection scripts. They will automatically save to Google Drive.


In [None]:
# Run LFFD dataset download
# This will download ~500MB and save to Drive
!python step1_download_lffd.py


## Step 7: Monitor Progress


In [None]:
# Count downloaded images
from pathlib import Path

authentic_dir = PROJECT_DIR / 'data' / 'images' / 'authentic'
fake_dir = PROJECT_DIR / 'data' / 'images' / 'fake'

if authentic_dir.exists():
    authentic_count = len(list(authentic_dir.glob('*.jpg')))
    print(f"Authentic images: {authentic_count}")
else:
    print("Authentic directory not found")

if fake_dir.exists():
    fake_count = len(list(fake_dir.glob('*.jpg')))
    print(f"Fake images: {fake_count}")
else:
    print("Fake directory not found")

# Check total size
if (PROJECT_DIR / 'data').exists():
    total_size = sum(f.stat().st_size for f in (PROJECT_DIR / 'data').rglob('*') if f.is_file())
    print(f"\nTotal data size: {total_size / (1024**3):.2f} GB")
