# Installing Dependencies

In [None]:
!pip install netCDF4


Collecting netCDF4
  Downloading netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m79.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


# Importing Libraries

In [None]:
import os
import gzip
import netCDF4
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Downloading Lat-Long File From The Cloud


In [None]:
root_dir = "/content/drive/My Drive/Graduate - Academic Work/Semester 3/EEE598 Deep Learning/Project/Data"
raw_data_dir = root_dir + "/Raw"
lat_long_file = root_dir + "/" + "lat_long.gz"

with gzip.open(lat_long_file) as gz:
    grid_dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
    print(grid_dataset.variables)
    lat_grid_raw = grid_dataset['latitude'][:]
    lon_grid_raw = grid_dataset['longitude'][:]

{'latitude': <class 'netCDF4.Variable'>
float32 latitude(lines, elems)
    long_name: latitude of GHE (positive North)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-65.  65.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on, 'longitude': <class 'netCDF4.Variable'>
float32 longitude(lines, elems)
    long_name: longitude of GHE (positive East)
    units: degrees
    parameter_type: GHE rain
    valid_range: [-180.  180.]
    _FillValue: -9999.0
unlimited dimensions: 
current shape = (4800, 10020)
filling on}


\## Retrieving Samples For a Specific Date

In [None]:
days = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
        '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
        '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
        '31']
months = ['07', '08', '09']
years = ["2024"]
count = 0
rainy_days = []

# Defining region of interest
min_lat, max_lat = 23.65101718199114, 28.571809429628
min_lon, max_lon = 66.40570152424772, 71.273030867477
lat_grid_raw = np.array(lat_grid_raw)
lon_grid_raw = np.array(lon_grid_raw)

# Find the indices for the latitude and longitude bounds as in the previous code snippet
lat_indices = np.where((lat_grid_raw[:, 0] >= min_lat) & (lat_grid_raw[:, 0] <= max_lat))[0]
lon_indices = np.where((lon_grid_raw[0, :] >= min_lon) & (lon_grid_raw[0, :] <= max_lon))[0]

for year in years:
  for month in months:
    files = []
    for day in days:
      try:
        print('Processing: ' + year + '/' + month + '/' + day)
        dir = raw_data_dir + "/" + year + "/" + month + "/" + day
        for filename in os.listdir(dir):
          file_path = dir + "/" + filename

          with gzip.open(file_path) as gz:
            dataset = netCDF4.Dataset('dummy', mode='r', memory=gz.read())
            precipitation = np.array(dataset['rain'][:])
            precipitation[precipitation < 0] = 0  # Remove fill values
            precipitation_subset = precipitation[lat_indices.min():lat_indices.max()+1, lon_indices.min():lon_indices.max()+1]

            if (precipitation_subset.max() > 25):
              dataset.close()
              print('Found rainy day: ' + year + '/' + month + '/' + day + ' ' + filename)
              rainy_days.append(year + '/' + month + '/' + day)
              break;

            dataset.close()
      except:
        pass;






Processing: 2024/07/01
Found rainy day: 2024/07/01 NPR.GEO.GHE.v1.S202407012130.nc.gz
Processing: 2024/07/02
Found rainy day: 2024/07/02 NPR.GEO.GHE.v1.S202407021815.nc.gz
Processing: 2024/07/03
Found rainy day: 2024/07/03 NPR.GEO.GHE.v1.S202407032215.nc.gz
Processing: 2024/07/04
Found rainy day: 2024/07/04 NPR.GEO.GHE.v1.S202407041245.nc.gz
Processing: 2024/07/05
Found rainy day: 2024/07/05 NPR.GEO.GHE.v1.S202407051445.nc.gz
Processing: 2024/07/06
Processing: 2024/07/07
Found rainy day: 2024/07/07 NPR.GEO.GHE.v1.S202407071400.nc.gz
Processing: 2024/07/08
Found rainy day: 2024/07/08 NPR.GEO.GHE.v1.S202407081000.nc.gz
Processing: 2024/07/09
Found rainy day: 2024/07/09 NPR.GEO.GHE.v1.S202407090715.nc.gz
Processing: 2024/07/10
Found rainy day: 2024/07/10 NPR.GEO.GHE.v1.S202407101530.nc.gz
Processing: 2024/07/11
Found rainy day: 2024/07/11 NPR.GEO.GHE.v1.S202407111315.nc.gz
Processing: 2024/07/12
Found rainy day: 2024/07/12 NPR.GEO.GHE.v1.S202407121745.nc.gz
Processing: 2024/07/13
Found ra

In [7]:
print(len(rainy_days)), print((rainy_days))

67
['2024/07/01', '2024/07/02', '2024/07/03', '2024/07/04', '2024/07/05', '2024/07/07', '2024/07/08', '2024/07/09', '2024/07/10', '2024/07/11', '2024/07/12', '2024/07/13', '2024/07/14', '2024/07/15', '2024/07/16', '2024/07/17', '2024/07/18', '2024/07/19', '2024/07/20', '2024/07/21', '2024/07/22', '2024/07/23', '2024/07/24', '2024/07/25', '2024/07/27', '2024/07/28', '2024/07/29', '2024/07/30', '2024/07/31', '2024/08/01', '2024/08/02', '2024/08/03', '2024/08/04', '2024/08/05', '2024/08/06', '2024/08/07', '2024/08/08', '2024/08/09', '2024/08/10', '2024/08/12', '2024/08/13', '2024/08/14', '2024/08/15', '2024/08/16', '2024/08/17', '2024/08/18', '2024/08/20', '2024/08/21', '2024/08/22', '2024/08/23', '2024/08/24', '2024/08/25', '2024/08/26', '2024/08/27', '2024/08/28', '2024/08/29', '2024/08/30', '2024/08/31', '2024/09/01', '2024/09/02', '2024/09/03', '2024/09/04', '2024/09/05', '2024/09/07', '2024/09/08', '2024/09/26', '2024/09/27']


(None, None)