# Import Needed Modules

In [1]:
#!pip install ee
#!pip install earthengine-api

In [2]:
import ee  # Import the Earth Engine library
import numpy as np  # Import NumPy for numerical operations
import pandas as pd  # Import Pandas for data handling

from sklearn.model_selection import train_test_split  # Import train-test split function
from sklearn.metrics import accuracy_score  # Import accuracy_score for model evaluation
from sklearn.ensemble import RandomForestClassifier  # Import RandomForestClassifier from scikit-learn
import random  # Import the random library for generating random values
import matplotlib.pyplot as plt  # Import Matplotlib for plotting

# Set seed for reproducability
SEED = 2023
random.seed(SEED)
np.random.seed(SEED)

# Authenticate and initialize Earth Engine.

In [3]:
# Get authetication token and sign in to Google Earth Engine
ee.Authenticate()
ee.Initialize()

Enter verification code: 4/1AfJohXk11pH9ooJ1jFbMuGvqCNZLkKJ9uw-Ks1oA-zbD8HeDtME9Ka8uD9g

Successfully saved authorization token.


# Extract the bands

In [4]:
%%time
# Authenticate to the Earth Engine servers
ee.Initialize()

# Function to mask out cloudy pixels
def mask_clouds(image):
    # Select the QA60 band from the image
    QA60 = image.select(['QA60'])

    # Create a cloud mask where cloudy pixels are set to 0 and others to 1
    cloud_mask = QA60.bitwiseAnd(1 << 10).eq(0)

    # Update the image mask with the cloud mask
    return image.updateMask(cloud_mask)

# Define the time_maps dictionary for your regions
time_maps = {
    'Afghanistan': {
        'start': '2022-04-01',
        'end': '2022-04-30'
    },
    'Iran': {
        'start': '2019-07-01',
        'end': '2020-06-30'
    },
    'Sudan': {
        'start': '2019-07-01',
        'end': '2020-06-30'
    }
}

BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

# List of countries and files
country_files = {
    'Afghanistan': ['train_afghan.csv', 'test_afghan.csv','additional.csv'],
    'Iran': ['train_iran.csv', 'test_iran.csv'],
    'Sudan': ['train_sudan.csv', 'test_sudan.csv']
}

# Iterate through countries and their corresponding files
for country, files in country_files.items():
    # Iterate through the files for the current country
    for file in files:
        # Read the CSV table
        csv_data = pd.read_csv(file)
        
        # Define the tileScale parameter
        tile_scale = 2  # You can adjust this value as needed
        
        # Extract latitude and longitude from the CSV data
        point_geometries = [ee.Geometry.Point(lon, lat) for lon, lat in zip(csv_data['lon'], csv_data['lat'])]
        
        # Create an ImageCollection based on the country and time period
        collection = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
                      .filterDate(time_maps[country]['start'], time_maps[country]['end']))
        
        # Apply the cloud masking function to the collection
        masked_collection = collection.map(mask_clouds)
        
        # Calculate mean values for the specified bands
        mean_values = (masked_collection
                       .select(BANDS)
                       .filterBounds(ee.FeatureCollection(point_geometries))
                       .mean()  
                       .reduceRegions(collection=ee.FeatureCollection(point_geometries), 
                                      reducer=ee.Reducer.mean(),  
                                      scale=10, 
                                      tileScale=tile_scale))
        
        # Extract the mean values for the specified bands
        results = []
        for feature in mean_values.getInfo()['features']:
            values = [feature['properties'][band] for band in BANDS]
            results.append(values)
        
        # Assign the extracted values to the CSV data
        for i, band in enumerate(BANDS):
            csv_data[band] = [values[i] for values in results]
        
        # Save the updated CSV data with the corresponding file name
        output_file = f'{country}_{file}'
        csv_data.to_csv(output_file, index=False)
        print(f'{output_file} saved.')

csv_data.head()

Afghanistan_train_afghan.csv saved.
Afghanistan_test_afghan.csv saved.
Afghanistan_additional.csv saved.
Iran_train_iran.csv saved.
Iran_test_iran.csv saved.
Sudan_train_sudan.csv saved.
Sudan_test_sudan.csv saved.
Wall time: 2min 38s


Unnamed: 0,id,lat,lon,B1,B2,B3,B4,B5,B6,B7,B8,B8A,B9,B11,B12
0,ID_SOYSG7W04UH3,14.431884,33.399991,618.928571,778.793651,1072.063492,1291.865079,1565.880952,1985.888889,2175.746032,2072.095238,2302.619048,2702.007937,1959.325397,1551.007937
1,ID_EAP7EXXV8ZDE,14.281866,33.441224,541.193548,691.467742,957.580645,1101.645161,1381.83871,1869.854839,2032.129032,2118.274194,2150.822581,2553.629032,1784.435484,1408.66129
2,ID_QPRX1TUQVGHU,14.399365,33.109566,884.45082,1002.942623,1321.467213,1722.245902,2080.459016,2217.959016,2369.434426,2361.122951,2512.713115,2963.934426,2905.065574,2575.368852
3,ID_C78YQ32G1KO9,14.196346,33.50716,616.580645,703.83871,1006.983871,1080.596774,1430.758065,1800.725806,1995.725806,2262.096774,2000.935484,2179.258065,1345.370968,1013.806452
4,ID_M5X39UIEM64N,14.348251,33.252488,476.050847,613.661017,880.949153,1094.271186,1357.288136,1875.169492,2161.457627,2244.728814,2261.830508,2458.830508,1692.254237,1336.779661
