# Installing Dependencies

In [None]:
!pip install netCDF4




# Importing Libraries

In [None]:
import os
import gzip
import netCDF4
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Downloading Lat-Long File From The Cloud


In [None]:
root_dir = "/content/drive/My Drive/Graduate - Academic Work/Semester 3/EEE598 Deep Learning/Project Proposal/Data"
raw_data_dir = root_dir + "/Raw"
processed_data_dir = root_dir + "/Processed"
lat_long_file = root_dir + "/" + "lat_long.gz"

with gzip.open(lat_long_file) as gz:
    grid_dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
    print(grid_dataset.variables)
    lat_grid_raw = grid_dataset['latitude'][:]
    lon_grid_raw = grid_dataset['longitude'][:]

{'latitude': <class 'netCDF4.Variable'>
float32 latitude(lines, elems)
    long_name: latitude of GHE (positive North)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-65.  65.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on, 'longitude': <class 'netCDF4.Variable'>
float32 longitude(lines, elems)
    long_name: longitude of GHE (positive East)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-180.  180.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on}


\## Retrieving Samples For a Specific Date

In [8]:
days = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
        '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
        '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
        '31']
months = ['07', '08', '09']
years = ["2019"]
count = 0
rainy_days = []

# Defining region of interest
min_lat, max_lat = 23.65101718199114, 28.571809429628
min_lon, max_lon = 66.40570152424772, 71.273030867477
lat_grid_raw = np.array(lat_grid_raw)
lon_grid_raw = np.array(lon_grid_raw)

# Find the indices for the latitude and longitude bounds as in the previous code snippet
lat_indices = np.where((lat_grid_raw[:, 0] >= min_lat) & (lat_grid_raw[:, 0] <= max_lat))[0]
lon_indices = np.where((lon_grid_raw[0, :] >= min_lon) & (lon_grid_raw[0, :] <= max_lon))[0]

def process_day(dir, processed_data_dir):
   for filename in os.listdir(dir):
          file_path = dir + "/" + filename

          with gzip.open(file_path) as gz:
            dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
            precipitation = np.array(dataset['rain'][:])
            precipitation[precipitation < 0] = 0  # Remove fill values
            precipitation_subset = precipitation[lat_indices.min():lat_indices.max()+1, lon_indices.min():lon_indices.max()+1]
            os.makedirs(processed_data_dir + f'/{year}/', exist_ok=True)
            np.save(processed_data_dir + f'/{year}/' + filename + '.npy', precipitation_subset)
            dataset.close()


for year in years:
  for month in months:
    files = []
    for day in days:
      try:
        print('Processing: ' + year + '/' + month + '/' + day)
        dir = raw_data_dir + "/" + year + "/" + month + "/" + day
        for filename in os.listdir(dir):
          file_path = dir + "/" + filename

          with gzip.open(file_path) as gz:
            dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
            precipitation = np.array(dataset['rain'][:])
            precipitation[precipitation < 0] = 0  # Remove fill values
            precipitation_subset = precipitation[lat_indices.min():lat_indices.max()+1, lon_indices.min():lon_indices.max()+1]

            if (precipitation_subset.max() > 25):
                print('Rainy Day: ' + year + '/' + month + '/' + day)
                rainy_days.append(year + '/' + month + '/' + day)
                process_day(dir, processed_data_dir)
                dataset.close()
                break;

            dataset.close()
      except:
        pass;







Processing: 2019/07/01
Rainy Day: 2019/07/01
Processing: 2019/07/02
Rainy Day: 2019/07/02
Processing: 2019/07/03
Rainy Day: 2019/07/03
Processing: 2019/07/04
Rainy Day: 2019/07/04
Processing: 2019/07/05
Processing: 2019/07/06
Processing: 2019/07/07
Processing: 2019/07/08
Processing: 2019/07/09
Processing: 2019/07/10
Processing: 2019/07/11
Processing: 2019/07/12
Rainy Day: 2019/07/12
Processing: 2019/07/13
Rainy Day: 2019/07/13
Processing: 2019/07/14
Processing: 2019/07/15
Processing: 2019/07/16
Rainy Day: 2019/07/16
Processing: 2019/07/17
Rainy Day: 2019/07/17
Processing: 2019/07/18
Rainy Day: 2019/07/18
Processing: 2019/07/19
Rainy Day: 2019/07/19
Processing: 2019/07/20
Rainy Day: 2019/07/20
Processing: 2019/07/21
Rainy Day: 2019/07/21
Processing: 2019/07/22
Rainy Day: 2019/07/22
Processing: 2019/07/23
Rainy Day: 2019/07/23
Processing: 2019/07/24
Rainy Day: 2019/07/24
Processing: 2019/07/25
Rainy Day: 2019/07/25
Processing: 2019/07/26
Rainy Day: 2019/07/26
Processing: 2019/07/27
Rainy

In [None]:
print(len(rainy_days)), print((rainy_days))

67
['2022/07/01', '2022/07/02', '2022/07/03', '2022/07/04', '2022/07/05', '2022/07/06', '2022/07/07', '2022/07/08', '2022/07/09', '2022/07/10', '2022/07/11', '2022/07/12', '2022/07/13', '2022/07/14', '2022/07/15', '2022/07/16', '2022/07/17', '2022/07/18', '2022/07/19', '2022/07/20', '2022/07/21', '2022/07/22', '2022/07/23', '2022/07/24', '2022/07/25', '2022/07/26', '2022/07/27', '2022/07/28', '2022/07/29', '2022/07/30', '2022/07/31', '2022/08/01', '2022/08/02', '2022/08/03', '2022/08/04', '2022/08/05', '2022/08/06', '2022/08/07', '2022/08/08', '2022/08/09', '2022/08/10', '2022/08/11', '2022/08/12', '2022/08/13', '2022/08/14', '2022/08/15', '2022/08/16', '2022/08/17', '2022/08/18', '2022/08/19', '2022/08/20', '2022/08/21', '2022/08/22', '2022/08/23', '2022/08/24', '2022/08/25', '2022/08/30', '2022/08/31', '2022/09/01', '2022/09/02', '2022/09/09', '2022/09/10', '2022/09/11', '2022/09/12', '2022/09/13', '2022/09/14', '2022/09/15']


(None, None)