In [2]:
import pandas as pd
df = pd.read_csv('data/train_clean.csv')
print(df.shape)
df[['price', 'lat', 'long']].head()

(16209, 20)


Unnamed: 0,price,lat,long
0,268643,47.4362,-122.187
1,245000,47.4034,-122.187
2,200000,47.2704,-122.313
3,352499,47.5321,-122.073
4,232000,47.3715,-122.074


In [3]:
import requests

url = "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/17/33164/61262"
response = requests.get(url)

# Save raw bytes to file
with open('test_raw.jpg', 'wb') as f:
    f.write(response.content)

print("File saved as test_raw.jpg")
print(f"Response size: {len(response.content)} bytes")
print(f"Status code: {response.status_code}")


File saved as test_raw.jpg
Response size: 2521 bytes
Status code: 200


In [None]:
import requests
import time
import os
from tqdm import tqdm
import math

# Create folder
os.makedirs('satellite_images', exist_ok=True)

# Download ALL 16k
start_time = time.time()
count = 0

for i in tqdm(range(len(df)), desc="Downloading"):
    lat = df.iloc[i]['lat']
    lon = df.iloc[i]['long']
    
    n = 2**17
    x = int((lon + 180) / 360 * n)
    y = int((1 - math.log(math.tan(math.radians(lat)) + 1/math.cos(math.radians(lat))) / math.pi) / 2 * n)
    
    filename = f'satellite_images/{i:05d}.jpg'
    
    if os.path.exists(filename):
        continue
        
    url = f"https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/17/{y}/{x}"
    response = requests.get(url, timeout=10)
    
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        count += 1
    
    time.sleep(0.1)  # Polite delay

total_time = time.time() - start_time
print(f"âœ… Downloaded {count}/{len(df)} images in {total_time/3600:.1f} hours")
