# Installing Dependencies

In [None]:
!pip install netCDF4


Collecting netCDF4
  Downloading netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m59.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


# Importing Libraries

In [None]:
import os
import gzip
import netCDF4
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Downloading Lat-Long File From The Cloud


In [None]:
root_dir = "/content/drive/My Drive/Graduate - Academic Work/Semester 3/EEE598 Deep Learning/Project/Data"
raw_data_dir = root_dir + "/Raw"
lat_long_file = root_dir + "/" + "lat_long.gz"

with gzip.open(lat_long_file) as gz:
    grid_dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
    print(grid_dataset.variables)
    lat_grid_raw = grid_dataset['latitude'][:]
    lon_grid_raw = grid_dataset['longitude'][:]

{'latitude': <class 'netCDF4.Variable'>
float32 latitude(lines, elems)
    long_name: latitude of GHE (positive North)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-65.  65.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on, 'longitude': <class 'netCDF4.Variable'>
float32 longitude(lines, elems)
    long_name: longitude of GHE (positive East)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-180.  180.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on}


\## Retrieving Samples For a Specific Date

In [6]:
days = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
        '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
        '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
        '31']
months = ['07', '08', '09']
years = ["2023"]
count = 0
rainy_days = []

# Defining region of interest
min_lat, max_lat = 23.65101718199114, 28.571809429628
min_lon, max_lon = 66.40570152424772, 71.273030867477
lat_grid_raw = np.array(lat_grid_raw)
lon_grid_raw = np.array(lon_grid_raw)

# Find the indices for the latitude and longitude bounds as in the previous code snippet
lat_indices = np.where((lat_grid_raw[:, 0] >= min_lat) & (lat_grid_raw[:, 0] <= max_lat))[0]
lon_indices = np.where((lon_grid_raw[0, :] >= min_lon) & (lon_grid_raw[0, :] <= max_lon))[0]

for year in years:
  for month in months:
    files = []
    for day in days:
      try:
        print('Processing: ' + year + '/' + month + '/' + day)
        dir = raw_data_dir + "/" + year + "/" + month + "/" + day
        for filename in os.listdir(dir):
          file_path = dir + "/" + filename

          with gzip.open(file_path) as gz:
            dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
            precipitation = np.array(dataset['rain'][:])
            precipitation[precipitation < 0] = 0  # Remove fill values
            precipitation_subset = precipitation[lat_indices.min():lat_indices.max()+1, lon_indices.min():lon_indices.max()+1]

            if (precipitation_subset.max() > 25):
              dataset.close()
              print('Found rainy day: ' + year + '/' + month + '/' + day + ' ' + filename)
              rainy_days.append(year + '/' + month + '/' + day)
              break;

            dataset.close()
      except:
        pass;






Processing: 2023/07/01
Found rainy day: 2023/07/01 NPR.GEO.GHE.v1.S202307010015.nc.gz
Processing: 2023/07/02
Processing: 2023/07/03
Found rainy day: 2023/07/03 NPR.GEO.GHE.v1.S202307031215.nc.gz
Processing: 2023/07/04
Processing: 2023/07/05
Found rainy day: 2023/07/05 NPR.GEO.GHE.v1.S202307051345.nc.gz
Processing: 2023/07/06
Found rainy day: 2023/07/06 NPR.GEO.GHE.v1.S202307061615.nc.gz
Processing: 2023/07/07
Found rainy day: 2023/07/07 NPR.GEO.GHE.v1.S202307070800.nc.gz
Processing: 2023/07/08
Found rainy day: 2023/07/08 NPR.GEO.GHE.v1.S202307082345.nc.gz
Processing: 2023/07/09
Found rainy day: 2023/07/09 NPR.GEO.GHE.v1.S202307090015.nc.gz
Processing: 2023/07/10
Found rainy day: 2023/07/10 NPR.GEO.GHE.v1.S202307101430.nc.gz
Processing: 2023/07/11
Processing: 2023/07/12
Found rainy day: 2023/07/12 NPR.GEO.GHE.v1.S202307121345.nc.gz
Processing: 2023/07/13
Processing: 2023/07/14
Found rainy day: 2023/07/14 NPR.GEO.GHE.v1.S202307142000.nc.gz
Processing: 2023/07/15
Processing: 2023/07/16
Fo

In [8]:
print(len(rainy_days)), print((rainy_days))

48
['2023/07/01', '2023/07/03', '2023/07/05', '2023/07/06', '2023/07/07', '2023/07/08', '2023/07/09', '2023/07/10', '2023/07/12', '2023/07/14', '2023/07/16', '2023/07/17', '2023/07/18', '2023/07/19', '2023/07/20', '2023/07/21', '2023/07/22', '2023/07/23', '2023/07/24', '2023/07/25', '2023/07/26', '2023/07/27', '2023/07/28', '2023/07/29', '2023/07/30', '2023/07/31', '2023/08/05', '2023/08/15', '2023/08/16', '2023/08/19', '2023/08/20', '2023/08/21', '2023/08/29', '2023/08/31', '2023/09/02', '2023/09/03', '2023/09/05', '2023/09/09', '2023/09/15', '2023/09/16', '2023/09/17', '2023/09/18', '2023/09/19', '2023/09/20', '2023/09/21', '2023/09/23', '2023/09/24', '2023/09/26']


(None, None)