# Pre-preprocessing Sentinel 1

list of `cell_id, date, lat, lon` for downloading

In [13]:
import pandas as pd
import geojson as gsn
from pyproj import Proj
from osgeo import gdal
from osgeo import gdalconst

import tempfile
import wget
import math
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import os
import pickle
from collections import defaultdict
from datetime import datetime, timedelta

In [14]:
def daynum_gen(date_time):
    '''converts date time objects to filename'''
    date_time = datetime.fromisoformat(date_time)
    doy = date_time.timetuple().tm_yday
    year = date_time.year
    return str(year) + '{:03d}'.format(doy)



In [15]:
submission = pd.read_csv("C:/Users/Matt/Documents/Python Scripts/SnowComp/dat/submission_format_2b.csv")

submission.rename({"Unnamed: 0":"cell_id"}, axis=1, inplace=True)
DATE = "2022-02-10"

In [16]:
path = "C:/Users/Matt/Documents/Python Scripts/SnowComp/dat/grid_cells_2b.geojson"
with open(path) as f:
    gj = gsn.load(f)
print(len(gj['features']))

20759


In [17]:
centroids = {} #cellid : centroid

for cell in range(len(gj['features'])):
    assert len(gj['features'][cell]['geometry']['coordinates'][0]) == 5 #coordinates have repeat on fifth, make sure this is universal
    
    cell_id =gj['features'][cell]['properties']['cell_id']
    centroid = list(np.mean(
        gj['features'][cell]['geometry']['coordinates'][0][0:4],
        axis = 0)) #lazy centroid calculation
    centroids[cell_id] = centroid

In [29]:
def date_loc_gen(cell_ids, metadata= None, centroids=centroids):
    date_locs = []
    
    cell_ids = cell_ids['cell_id']
    counter_sub = 0 
    for cell in tqdm(cell_ids):
        date_locs.append([cell, DATE, centroids[cell][1], centroids[cell][0]])
        
        counter_sub += 1 
    
    
    date_locs = pd.DataFrame(date_locs, columns = ['cell_id', 'date', 'lat', 'lon'])       
    date_locs['date_long'] = date_locs['date'].map(daynum_gen)
    
    return date_locs

## Submission dataset

In [30]:
q1 = int(submission.shape[0]/4)
q2 = int(submission.shape[0]/2)
q3 = int(submission.shape[0]/4*3)

print(submission.shape)
print(submission.iloc[0:q1].shape)
print(submission.iloc[q1:q2].shape)
print(submission.iloc[q2:q3].shape)
print(submission.iloc[q3:].shape)


sub_all = [submission.iloc[0:q1], submission.iloc[q1:q2],
           submission.iloc[q2:q3],submission.iloc[q3:]]

(20759, 26)
(5189, 26)
(5190, 26)
(5190, 26)
(5190, 26)


In [31]:
for i, ds in enumerate(sub_all):
    date_loc_sub = date_loc_gen(ds)

    date_loc_sub  = date_loc_sub.sort_values("date_long")
    print(date_loc_sub.shape)

    #note no filtering is needed because these are later dates

    sent_path = "C:/Users/Matt/Dropbox/SnowComp/SentinelHelper/realtime/"
    date_loc_sub.to_csv(sent_path + "sub_dateloc"+ "{}of{}_".format(i+1,len(sub_all))+
                        ".csv", index= False)

  0%|          | 0/5189 [00:00<?, ?it/s]

(5189, 5)


  0%|          | 0/5190 [00:00<?, ?it/s]

(5190, 5)


  0%|          | 0/5190 [00:00<?, ?it/s]

(5190, 5)


  0%|          | 0/5190 [00:00<?, ?it/s]

(5190, 5)
