In [9]:
import requests
import os
import openai
import concurrent.futures
import time
from tqdm import tqdm
from torchvision import transforms
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time
import wandb
import sys
print(sys.executable)




/opt/anaconda3/envs/dev/bin/python


In [10]:
# Set the device to CPU since CUDA isn't available
device = torch.device('cpu')
print(f"Using device: {device}")

Using device: cpu


In [11]:
# print(sys.executable)
!pip install openai torch torchvision tqdm wandb scikit-learn requests



In [12]:


def check_imports():
    try:
        import openai
        print("✓ openai successfully imported")
    except ImportError:
        print("✗ openai not installed properly")
        
    try:
        import torch
        print(f"✓ torch successfully imported (version {torch.__version__})")
        print(f"  CUDA available: {torch.cuda.is_available()}")
    except ImportError:
        print("✗ torch not installed properly")
        
    try:
        import torchvision
        print(f"✓ torchvision successfully imported (version {torchvision.__version__})")
    except ImportError:
        print("✗ torchvision not installed properly")
        
    try:
        import tqdm
        print("✓ tqdm successfully imported")
    except ImportError:
        print("✗ tqdm not installed properly")
    try:
        import wandb
        print("✓ wandb successfully imported")
    except ImportError:
        print("✗ wandb not installed properly")
        
    try:
        import sklearn
        print(f"✓ scikit-learn successfully imported (version {sklearn.__version__})")
    except ImportError:
        print("✗ scikit-learn not installed properly")
        
    try:
        import requests
        print("✓ requests successfully imported")
    except ImportError:
        print("✗ requests not installed properly")

# Run the check
check_imports()

✓ openai successfully imported
✓ torch successfully imported (version 2.5.1)
  CUDA available: False
✓ torchvision successfully imported (version 0.20.1)
✓ tqdm successfully imported
✓ wandb successfully imported
✓ scikit-learn successfully imported (version 1.4.2)
✓ requests successfully imported


In [14]:
def download_unsplash_photos(client_id, save_dir, num_photos):
    """
    Downloads random stock photos from Unsplash API.
    
    Args:
        client_id (str): Your Unsplash API client ID
        save_dir (str): Directory where photos will be saved
        num_photos (int): Total number of photos to download
    
    Returns:
        list: Paths to downloaded photos
    """
    # Create directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)

    # Setup API parameters
    url = "https://api.unsplash.com/photos/random"
    headers = {
        "Authorization": f"Client-ID {client_id}",
        "Accept-Version": "v1"
    }
    
    downloaded_paths = []
    photos_to_download = num_photos

    # Download photos in batches since API limits to 30 per request
    while photos_to_download > 0:
        try:
            # Calculate batch size for this iteration
            batch_size = min(30, photos_to_download)
            
            params = {
                "count": batch_size,
                "query": "stock photo",
                "orientation": "landscape"  # Consistent image orientation
            }
            # Make API request with error handling
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()  # Raise exception for bad status codes
            photos = response.json()
            
            # Download each photo in the batch
            for photo in tqdm(photos, desc="Downloading photos"):
                try:
                    img_url = photo['urls']['regular']
                    img_response = requests.get(img_url)
                    img_response.raise_for_status()
                    
                    img_name = os.path.join(save_dir, f"{photo['id']}.jpg")
                    with open(img_name, 'wb') as f:
                        f.write(img_response.content)
                    
                    downloaded_paths.append(img_name)

                except (KeyError, requests.RequestException) as e:
                    print(f"Error downloading photo: {str(e)}")
                    continue
            
            photos_to_download -= len(photos)
            
            # Respect Unsplash API rate limits
            time.sleep(1)
            
        except requests.RequestException as e:
            print(f"Error making API request: {str(e)}")
            break
    
    print(f"Successfully downloaded {len(downloaded_paths)} photos")
    return downloaded_paths

In [19]:
def test_unsplash_download():
    client_id = "XTmL-fe-ko4CMi-wI-Huj_Z0vadoeby5dH9IKv_EAi8"  # Replace with your actual client ID
    save_dir = "unsplash_photos"
    num_photos = 10
    
    try:
        photo_paths = download_unsplash_photos(client_id, save_dir, num_photos)
        print(f"Download completed. Photos saved in {save_dir}")
        
        # Verify the downloads
        actual_files = os.listdir(save_dir)
        print(f"Number of files downloaded: {len(actual_files)}")
        
    except Exception as e:
        print(f"Error during download process: {str(e)}")

In [20]:
# Let's modify your StockPhotoDetector class to explicitly handle CPU tensors
class StockPhotoDetector(nn.Module):
    def __init__(self):
        super(StockPhotoDetector, self).__init__()
        
        # Reducing model complexity slightly since we're on CPU
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),  # Reduced from 32
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # Reduced from 64
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # Reduced from 128
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 28 * 28, 256),  # Reduced from 512
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [21]:
download_unsplash_photos('XTmL-fe-ko4CMi-wI-Huj_Z0vadoeby5dH9IKv_EAi8', 'unsplash_photos', 100)

Downloading photos: 100%|██████████| 30/30 [00:08<00:00,  3.56it/s]
Downloading photos: 100%|██████████| 30/30 [00:05<00:00,  5.43it/s]
Downloading photos: 100%|██████████| 30/30 [00:06<00:00,  4.54it/s]
Downloading photos: 100%|██████████| 10/10 [00:01<00:00,  7.77it/s]


Successfully downloaded 100 photos


['unsplash_photos/R-ntN6PE-4w.jpg',
 'unsplash_photos/Z7d3gwPMSRQ.jpg',
 'unsplash_photos/BauovfUhLhY.jpg',
 'unsplash_photos/WfrlYHgmLiM.jpg',
 'unsplash_photos/PVkX5S08u4Y.jpg',
 'unsplash_photos/lYMqE22gpXc.jpg',
 'unsplash_photos/bmO2bPPcomg.jpg',
 'unsplash_photos/HtqkHc8uhRI.jpg',
 'unsplash_photos/_-Ofoh09q_o.jpg',
 'unsplash_photos/7VSJvwH7dyg.jpg',
 'unsplash_photos/zrMpx6q2fZ0.jpg',
 'unsplash_photos/Vn6p6CVM2Q0.jpg',
 'unsplash_photos/gVa7zpwsxZo.jpg',
 'unsplash_photos/GEsUykZ1moU.jpg',
 'unsplash_photos/KlCs1THkzdc.jpg',
 'unsplash_photos/2IqKGn0pee4.jpg',
 'unsplash_photos/jjAAiZFooQg.jpg',
 'unsplash_photos/mrPm2HdS3lg.jpg',
 'unsplash_photos/x9KFWUlMnSU.jpg',
 'unsplash_photos/-W7PoC64tNI.jpg',
 'unsplash_photos/-itqD54b02U.jpg',
 'unsplash_photos/MYOyeR6WTxo.jpg',
 'unsplash_photos/LQhcr0apQ_c.jpg',
 'unsplash_photos/R-GPpK7Th2M.jpg',
 'unsplash_photos/iqrBzwJEsRo.jpg',
 'unsplash_photos/t3nwaZWoOvM.jpg',
 'unsplash_photos/i9TGriScRD8.jpg',
 'unsplash_photos/ppwxwTuCf7

In [None]:
openai.api_key = 'OPEN_API_KEY'

def generate_dalle_images(num_images, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    for i in range(num_images):
        response = openai.Image.create(prompt="stock photo", n=1, size="1024x1024")
        img_url = response['data'][0]['url']
        img_data = requests.get(img_url).content
        with open(f"{save_dir}/dalle_{i}.jpg", 'wb') as f:
            f.write(img_data)

generate_dalle_images(100, 'dalle_images')

In [None]:
def download_batch(source, count):
    downloaded = 0
    while downloaded < count:
        try:
            # Example download process (adjust based on actual API)
            downloaded += 1
        except Exception as e:
            print(f"Error: {e}")
    return downloaded

sources = {'unsplash': 100, 'pexels': 100, 'flickr': 100}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(download_batch, source, count) for source, count in sources.items()]
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
        result = future.result()
        print(f"Completed batch with {result} downloads")

In [None]:
transform_augment = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
train_paths, temp_paths, train_labels, temp_labels = train_test_split(image_paths, labels, test_size=0.3, random_state=42)
val_paths, test_paths, val_labels, test_labels = train_test_split(temp_paths, temp_labels, test_size=0.5, random_state=42)

In [None]:
class StockPhotoDetector(nn.Module):
    def __init__(self):
        super(StockPhotoDetector, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 2)  # 2 classes: Real vs AI-generated
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=10, device='cuda'):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.1)
    best_val_accuracy = 0.0
    best_model_path = 'best_model.pth'

    model = model.to(device)
    wandb.init(project="stock-photo-detector", name="training_run_v1")

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_accuracy = correct_val / total_val
        print(f"Train Loss: {running_loss / len(train_loader):.4f}, Val Accuracy: {val_accuracy:.4f}")

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f"Saved Best Model at Epoch {epoch+1}")

        scheduler.step(val_loss)

In [None]:
def evaluate_model(model, test_loader, device='cuda'):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")