### Basic library imports

In [3]:
import os
import pandas as pd
import concurrent.futures
import requests  # For downloading images

# Update the dataset folder path with forward slashes
DATASET_FOLDER = '../dataset'

# Print the current working directory
print("Current working directory:", os.getcwd())

# List files in the dataset folder
print("Files in dataset folder:", os.listdir(DATASET_FOLDER))

# Try reading CSV files with error handling
try:
    train = pd.read_csv(os.path.join(DATASET_FOLDER, 'train.csv'))
    print("Successfully read train.csv")
except FileNotFoundError as e:
    print(f"Error: train.csv not found. {e}")

try:
    test = pd.read_csv(os.path.join(DATASET_FOLDER, 'test.csv'))
    print("Successfully read test.csv")
except FileNotFoundError as e:
    print(f"Error: test.csv not found. {e}")

# Load the sample_test.csv file
try:
    sample_test = pd.read_csv(os.path.join(DATASET_FOLDER, 'sample_test.csv'))
    print("Successfully read sample_test.csv")
except FileNotFoundError as e:
    print(f"Error: sample_test.csv not found. {e}")
    sample_test = pd.DataFrame()  # Empty DataFrame to avoid further errors

# Check if sample_test is not empty and contains 'image_link' column
if not sample_test.empty and 'image_link' in sample_test.columns:
    # Extract image links
    image_links = sample_test['image_link'].tolist()
    print("Image links:", image_links[:5])  # Print first 5 links for verification

    # Mock function to download images (replace with your actual function)
    def download_images(image_link, save_path):
        try:
            response = requests.get(image_link)
            if response.status_code == 200:
                file_name = os.path.join(save_path, os.path.basename(image_link))
                with open(file_name, 'wb') as f:
                    f.write(response.content)
                print(f"Downloaded: {file_name}")
            else:
                print(f"Failed to download: {image_link}")
        except Exception as e:
            print(f"Error downloading {image_link}: {e}")

    # Function to download a batch of images
    def download_batch(image_links, save_path):
        with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
            # Use lambda to include save_path in the function call
            executor.map(lambda link: download_images(link, save_path), image_links)

    # Ensure the directory exists
    images_folder = '../images'
    os.makedirs(images_folder, exist_ok=True)

    # Split image links into batches of 50
    batch_size = 50
    batches = [image_links[i:i+batch_size] for i in range(0, len(image_links), batch_size)]

    # Process each batch
    for batch in batches:
        download_batch(batch, images_folder)
else:
    print("sample_test.csv is either empty or does not contain 'image_link' column.")

Current working directory: d:\Downloads\66e31d6ee96cd_student_resource_3\student_resource 3\src
Files in dataset folder: ['sample_test.csv', 'sample_test_out.csv', 'sample_test_out_fail.csv', 'test.csv', 'train.csv']
Successfully read train.csv
Successfully read test.csv
Successfully read sample_test.csv
Image links: ['https://m.media-amazon.com/images/I/41-NCxNuBxL.jpg', 'https://m.media-amazon.com/images/I/41-NCxNuBxL.jpg', 'https://m.media-amazon.com/images/I/417NJrPEk+L.jpg', 'https://m.media-amazon.com/images/I/417SThj+SrL.jpg', 'https://m.media-amazon.com/images/I/417SThj+SrL.jpg']
Downloaded: ../images\41uwo4PVnuL.jpg
Downloaded: ../images\51r7U52rh7L.jpg
Downloaded: ../images\41-NCxNuBxL.jpg
Downloaded: ../images\51l6c6UcRZL.jpg
Downloaded: ../images\51oaOP8qJlL.jpg
Downloaded: ../images\51EBBqNOJ1L.jpg
Downloaded: ../images\51fAzxNm+cL.jpg
Downloaded: ../images\41pvwR9GbaL.jpg
Downloaded: ../images\41ADVPQgZOL.jpg
Downloaded: ../images\41zgjN+zW3L.jpg
Downloaded: ../images\417