```
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <chrono>
#include <cstdio>

// Helper functions for simulation and I/O
void simulate_particles(thrust::device_vector<float>& data) {
    // GPU computation
    /* ... particle simulation code ... */
}

void save_particles_to_file(const thrust::host_vector<float>& data, int frame) {
    // CPU operation to save data
    printf("Saved frame %d\n", frame);
}

void print_statistics(const thrust::host_vector<float>& data) {
    size_t n = data.size() / 4;
    float avg_x = 0, avg_y = 0;
    float avg_vx = 0, avg_vy = 0;
    
    for(size_t i = 0; i < n; i++) {
        avg_x += data[i*4 + 0];
        avg_y += data[i*4 + 1];
        avg_vx += data[i*4 + 2];
        avg_vy += data[i*4 + 3];
    }
    
    avg_x /= n; avg_y /= n;
    avg_vx /= n; avg_vy /= n;
    
    printf("Frame statistics:\n");
    printf("  Average position: (%f, %f)\n", avg_x, avg_y);
    printf("  Average velocity: (%f, %f)\n", avg_vx, avg_vy);
}

int main() {
    const int N = 1000000;  // 1 million particles
    
    // Initialize on host
    thrust::host_vector<float> h_init(N * 4);
    for(int i = 0; i < N; i++) {
        h_init[i*4 + 0] = i * 0.1f;     // x position
        h_init[i*4 + 1] = i * -0.1f;    // y position
        h_init[i*4 + 2] = 1.0f;         // x velocity 
        h_init[i*4 + 3] = -0.5f;        // y velocity
    }
    
    // Copy to device for simulation
    thrust::device_vector<float> d_data = h_init;
    thrust::host_vector<float> h_data(N * 4);  // For CPU operations
    
    auto start = std::chrono::high_resolution_clock::now();
    
    // Main simulation loop
    for(int frame = 0; frame < 100; frame++) {
        // GPU computation
        simulate_particles(d_data);
        
        // Only copy to host when needed
        bool need_host_data = (frame % 10 == 0) || (frame % 25 == 0);
        
        if(need_host_data) {
            // Copy data to host
            thrust::copy(d_data.begin(), d_data.end(), h_data.begin());
            
            // Handle CPU operations
            if(frame % 10 == 0) {
                save_particles_to_file(h_data, frame);
            }
            if(frame % 25 == 0) {
                print_statistics(h_data);
            }
        }
    }
    
    auto end = std::chrono::high_resolution_clock::now();
    double seconds = std::chrono::duration<double>(end - start).count();
    printf("\nSimulation completed in %g seconds\n", seconds);
    
    return 0;
}
```