In [1]:
import pandas as pd
import time
import math
import numpy as np


df=pd.read_csv('processed_data.csv')


In [2]:
import geetools
import ee
import geemap.core as geemap
ee.Authenticate()
ee.Initialize(project="pm-predictions")

In [3]:
df['Date']=pd.to_datetime(df['Date'])

In [4]:
df.head()

Unnamed: 0,mp10,station,mp2.5,dirviento,tamb,so2,velviento,hrel,no2,o3,nox,no,co,region,Date,Year,Month,Day,lat,long
0,178.542,El_bosque,49.7829,221.195,,9.5,0.950428,40.5833,30.3333,16.9167,65.1666,34.8333,0.929165,1,1997-04-02,1997,4,2,-33.547016,-70.666163
1,178.542,El_bosque,49.7829,221.195,,9.5,0.950428,40.5833,30.3333,12.4583,65.1666,34.8333,0.929165,1,1997-04-03,1997,4,3,-33.547016,-70.666163
2,178.542,El_bosque,49.7829,221.195,,9.5,0.950428,40.5833,30.3333,23.2083,65.1666,34.8333,0.929165,1,1997-04-04,1997,4,4,-33.547016,-70.666163
3,178.542,El_bosque,49.7829,221.195,,9.5,0.950428,40.5833,30.3333,28.2917,65.1666,34.8333,0.929165,1,1997-04-05,1997,4,5,-33.547016,-70.666163
4,146.042,El_bosque,49.7829,221.195,,9.5,0.950428,40.5833,30.3333,21.2083,65.1666,34.8333,1.20416,1,1997-04-06,1997,4,6,-33.547016,-70.666163


In [7]:
import datetime
def image_task(image,point,folder_name, image_name,storage="Drive"):

    """
    function to download satellite images from a ee.imageCollection object.
    We first generate a bounding box of image_res*n_pixels meters around "point",
    then we clip that region from the image collection, take the mean image from the collection,
    and send that as a task to the Google Earth Engine.
    After that, we download the image Google Cloud Storage if storage=="Cloud",
    or to Google Drive if storage=="Drive".

    Inputs:
    -image= ee.ImageCollection object
    -point= ee.Geometry.Point object
    -image_res= resolution of the image in meters
    -n_pixels= number of pixels to extract on the images
    -storage= string indicating if we are storing the images in Google Cloud or Google Drive.
              Defaults to Google Cloud.
    -folder_name= string with Google Cloud bucket name if storage=="Cloud"
                  string with the name of a folder in the root of Google Drive if storage=="Drive"
    -image_name= string with the image_name for the TIFF image.

    Output:
     task= an EE task object. we can then use task.status() to check the status of the task.
     If the task is completed, we will see a TIFF image in "folder_name" with name "image_name.tif".
     The image has 3 dimensions, where the first 2 are n_pixels, and the 3rd is the number of bands of "image".
    """
    #generating the box around the point
    len=30*224 # for landsat, 30 meters * 224 pixels
    region= point.buffer(len/2).bounds().getInfo()['coordinates']
    #defining the rectangle
    coords=np.array(region)
    #taking min and maxs of coordinates to define the rectangle
    coords=[np.min(coords[:,:,0]), np.min(coords[:,:,1]), np.max(coords[:,:,0]), np.max(coords[:,:,1])]
    rectangle=ee.Geometry.Rectangle(coords)

    #generating the export task ( dimensions is "WIDTHxHEIGHT"  )
    if storage=="Cloud":
        task=ee.batch.Export.image.toCloudStorage(image=image.filterBounds(rectangle).mean(),
                            bucket=folder_name,
                            description=image_name,
                            region=str(region), dimensions=str(224)+"x"+str(224))
    if storage=="Drive":
        task=ee.batch.Export.image.toDrive(image=image.filterBounds(rectangle).mean(),
                            folder=folder_name,
                            description=image_name,
                            region=str(region), dimensions=str(224)+"x"+str(224))


    #starting the task
    task.start()
    return task

def imagery(date):
    ''' function to decide what imagery we are using depending on the year, and
    to take composites from the relevant year for our ground truth '''
    year=date.year
    end_date = date + datetime.timedelta(days=10)
    if year>2013: #in this case we use landsat 8
        #getting collection
        landsat = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")

        #filtering dates
        landsat=landsat.filterDate(str(year)+'-'+str(date.month)+'-'+str(date.day), str(end_date.year)+'-'+str(end_date.month)+'-'+str(end_date.day))

        #applying cloud masking


        # selecting bands for both versions

        landsat = landsat.select(["B2","B3","B4","B5","B6","B7"])

    elif year>1999:
        landsat = ee.ImageCollection("LANDSAT/LE07/C02/T1")
        #filtering dates
        landsat=landsat.filterDate(str(year)+'-'+str(date.month)+'-'+str(date.day), str(end_date.year)+'-'+str(end_date.month)+'-'+str(end_date.day))

        #applying cloud masking


        # selecting bands for both versions

        landsat = landsat.select(["B1","B2","B3","B4","B5","B7"])
    else:
        landsat = ee.ImageCollection("LANDSAT/LT05/C02/T1")
        #filtering dates
        landsat=landsat.filterDate(str(year)+'-'+str(date.month)+'-'+str(date.day), str(end_date.year)+'-'+str(end_date.month)+'-'+str(end_date.day))



        # selecting bands for both versions

        landsat = landsat.select(["B1","B2","B3","B4","B5","B7"])


    # filtering date (taking a full year)

    return landsat


df['original_index'] = df.index

# column for task status
df['task_status']='_'

#list to store the task objects
tasks=[]

#running the tasks to get the imagery
batch_size=500
starting_index=21700
N=len(df)
for j in range( math.ceil(N/batch_size) ):
    #determining batch lower and upper indexes, given batch size
    #lower is always fixed
    lower_i=starting_index+batch_size*j
    #upper can vary at the end of the list
    if batch_size*(j+1)>N:
        upper_i=starting_index+N
    else:
        upper_i=starting_index+batch_size*(j+1)


    #generating the tasks for all the images in the batch
    for i in range(lower_i,upper_i):
        tasks.append(image_task(image=imagery(df['Date'][i]),
                                point=ee.Geometry.Point(df['long'][i],df['lat'][i] ),
                                folder_name='GEE_Images',
                                image_name=str(df['station'][i]+'_'+str(df['Date'][i])),
                                storage="Drive"))

    #printing message:
    print('Batch '+str(j+1)+': Retrieving images '+str(lower_i+1)+' to '+str(upper_i)+' of a total of '+str(starting_index+N))

    #checking status of the mentioned tasks
    batch_status=df.loc[lower_i:upper_i-1,'task_status'].value_counts() #counting status of the tasks

    while batch_status.get('COMPLETED',0) + batch_status.get('FAILED',0)< upper_i - lower_i: #checking that not all tasks are done
        time.sleep(10) #running the code every 10 seconds
        for i in range(lower_i,upper_i):
            #checking status of each task
            if df.loc[i,'task_status']=='_' or df.loc[i,'task_status']=='READY' or df.loc[i,'task_status']=='RUNNING':
                df.loc[i,'task_status']=tasks[i-starting_index].status()['state'] #use when restarting list of tasks
                #dataset.loc[i,'task_status']=tasks[i].status()['state']

        #updating batch status
        batch_status=df.loc[lower_i:upper_i-1,'task_status'].value_counts()
        #reporting them back
        print('Status of batch '+str(j+1)+':')
        print('completed images= '+str(batch_status.get('COMPLETED',0)))
        print('failed images= '+str(batch_status.get('FAILED',0)))
        print('pending images= '+str(upper_i-lower_i -batch_status.get('COMPLETED',0)-batch_status.get('FAILED',0)))
        print('------------------')

    #updating dataset after every batch

    df.to_csv('Status.csv',index=False)

print('The Landsat download code has finished')

Batch 1: Retrieving images 14201 to 14700 of a total of 166117
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------------------
Status of batch 1:
completed images= 0
failed images= 0
pending images= 500
------

KeyboardInterrupt: 

In [None]:
from keras.models import Model
from keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Dropout, Bidirectional, Input, concatenate,Reshape
from keras.utils import plot_model

# Definición del modelo CNN
input1 = Input(shape=(None, 100, 100, 8)) # Asume que tus imágenes son de 100x100 con 8 canales
cnn = TimeDistributed(Conv2D(64, kernel_size=3, activation='relu'))(input1)
cnn = TimeDistributed(MaxPooling2D(2))(cnn)
cnn = TimeDistributed(Conv2D(128, kernel_size=3, activation='relu'))(cnn)
cnn = TimeDistributed(MaxPooling2D(2))(cnn)
cnn = TimeDistributed(Conv2D(64, kernel_size=3, activation='relu'))(cnn)
cnn = TimeDistributed(MaxPooling2D(2))(cnn)
cnn = TimeDistributed(Flatten())(cnn)

# Definición del modelo LSTM
input2 = Input(shape=(None, 14)) # Asume que tienes 14 características tabuladas
dense = TimeDistributed(Dense(128, activation='relu'))(input2)
dense = TimeDistributed(Dropout(0.5))(dense)
dense = TimeDistributed(Dense(64, activation='relu'))(dense)
dense = TimeDistributed(Dropout(0.5))(dense)
dense = TimeDistributed(Dense(32, activation='relu'))(dense)

# Combinación de los modelos
merge = concatenate([cnn, dense])
lstm = Bidirectional(LSTM(100, return_sequences=True))(merge)
lstm = Dropout(0.5)(lstm)
lstm = Bidirectional(LSTM(100, return_sequences=False))(lstm)
lstm = Dropout(0.5)(lstm)

# Capa de salida
output = Dense(1, activation='linear')(lstm) # Asume que estás haciendo una regresión

# Creación del modelo
model = Model([input1, input2], output)
model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae'])



In [None]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)