In [None]:
import os
import shutil
from google.colab import drive

print("Cleaning up broken connection points...")

!umount -f /content/drive

if os.path.exists('/content/drive'):
    shutil.rmtree('/content/drive')
    print("Deleted blocked directory.")

os.makedirs('/content/drive')
print("Created fresh directory.")

print("Attempting to mount...")
drive.mount('/content/drive')

if os.path.exists("/content/drive/MyDrive/housing_project/train_images"):
    count = len(os.listdir("/content/drive/MyDrive/housing_project/train_images"))
    print(f"\nSUCCESS: Drive connected! You have {count} images ready.")
else:
    print("\nDrive connected, but 'housing_project' folder not found yet.")

Cleaning up broken connection points...
umount: /content/drive: not mounted.
Deleted blocked directory.
Created fresh directory.
Attempting to mount...
Mounted at /content/drive

SUCCESS: Drive connected! You have 10884 images ready.


In [None]:
file_path = "/content/drive/MyDrive/train.csv"
df = pd.read_csv(file_path)

In [None]:
!pip install sentinelhub --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/243.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m235.5/243.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.4/243.4 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/165.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.6/165.6 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.1/250.1 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import requests
import pandas as pd
import os
import time
from io import BytesIO
from PIL import Image


CLIENT_ID = "c44a8c52-5897-4d69-9db1-a9df378d75c0"
CLIENT_SECRET = "VeHBuODaTP2hBHSZt7yBk4CaYn2OGkNk"

BASE_DIR = "/content/drive/MyDrive/housing_project"
TRAIN_IMG_DIR = os.path.join(BASE_DIR, "train_images")
TEST_IMG_DIR = os.path.join(BASE_DIR, "test_images")


os.makedirs(TRAIN_IMG_DIR, exist_ok=True)
os.makedirs(TEST_IMG_DIR, exist_ok=True)

def get_auth_token():
    token_url = "https://services.sentinel-hub.com/oauth/token"
    payload = {
        "grant_type": "client_credentials",
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET
    }
    try:
        response = requests.post(token_url, data=payload)
        response.raise_for_status()
        return response.json()['access_token']
    except Exception as e:
        print(f"Auth Error: {e}")
        return None

def download_image_sentinel(lat, lon, house_id, save_folder, token):
    url = "https://services.sentinel-hub.com/api/v1/process"
    file_path = os.path.join(save_folder, f"house_{house_id}.png")


    if os.path.exists(file_path):
        return "Skipped"

    offset = 0.005
    bbox = [lon - offset, lat - offset, lon + offset, lat + offset]

    evalscript = """
    //VERSION=3
    function setup() {
      return {
        input: ["B04", "B03", "B02"],
        output: { bands: 3 }
      };
    }
    function evaluatePixel(sample) {
      // Multiply by 2.0 to brighten the image
      return [sample.B04 * 2.0, sample.B03 * 2.0, sample.B02 * 2.0];
    }
    """

    payload = {
        "input": {
            "bounds": {
                "bbox": bbox,
                "properties": {"crs": "http://www.opengis.net/def/crs/EPSG/0/4326"}
            },
            "data": [{
                "type": "sentinel-2-l1c",
                "dataFilter": {
                    "timeRange": {"from": "2023-01-01T00:00:00Z", "to": "2023-12-30T23:59:59Z"},
                    "mosaickingOrder": "leastCC" # Least Cloudy
                }
            }]
        },
        "output": {
            "width": 224,
            "height": 224,
            "responses": [{"identifier": "default", "format": {"type": "image/png"}}]
        },
        "evalscript": evalscript
    }

    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            image_data = BytesIO(response.content)
            img = Image.open(image_data)
            img.save(file_path)
            return "Success"
        else:
            return f"Failed: {response.status_code}"
    except Exception as e:
        return f"Error: {e}"

def process_dataset_in_batches(file_path, save_folder, batch_size=50):
    # Load Data
    print(f"Loading {file_path}...")
    df = pd.read_csv(file_path)
    total_rows = len(df)

    print(f"Total Images to Process: {total_rows}")

    token = get_auth_token()


    for start in range(0, total_rows, batch_size):
        end = min(start + batch_size, total_rows)
        print(f"\nProcessing batch {start} to {end}...")


        if start % 500 == 0:
            token = get_auth_token()
            print("Token Refreshed.")

        subset = df.iloc[start:end]

        for index, row in subset.iterrows():
            house_id = str(row['id'])

            status = download_image_sentinel(
                row['lat'],
                row['long'],
                house_id,
                save_folder,
                token
            )

            if status == "Success":
                print(f".", end="")
            elif status != "Skipped":
                print(f"[{house_id}: {status}]", end=" ")

            time.sleep(0.1)

train_file = "/content/drive/MyDrive/train.csv"
test_file = "/content/drive/MyDrive/test.csv"

print("--- STARTING TRAIN DATASET ---")
process_dataset_in_batches(train_file, TRAIN_IMG_DIR, batch_size=200)

print("\n--- STARTING TEST DATASET ---")
process_dataset_in_batches(test_file, TEST_IMG_DIR, batch_size=200)

--- STARTING TRAIN DATASET ---
Loading /content/drive/MyDrive/train.csv...
Total Images to Process: 16209

Processing batch 0 to 200...
Token Refreshed.

Processing batch 200 to 400...

Processing batch 400 to 600...

Processing batch 600 to 800...

Processing batch 800 to 1000...

Processing batch 1000 to 1200...
Token Refreshed.

Processing batch 1200 to 1400...

Processing batch 1400 to 1600...

Processing batch 1600 to 1800...

Processing batch 1800 to 2000...

Processing batch 2000 to 2200...
Token Refreshed.

Processing batch 2200 to 2400...

Processing batch 2400 to 2600...

Processing batch 2600 to 2800...

Processing batch 2800 to 3000...

Processing batch 3000 to 3200...
Token Refreshed.

Processing batch 3200 to 3400...

Processing batch 3400 to 3600...

Processing batch 3600 to 3800...

Processing batch 3800 to 4000...

Processing batch 4000 to 4200...
Token Refreshed.

Processing batch 4200 to 4400...

Processing batch 4400 to 4600...

Processing batch 4600 to 4800...

Pro