In [None]:
#Mount drive to save files there
#clone the repository to access files from there
#pull the latest
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
! git clone https://github.com/NASAARSET/VIIRS_NOAA.git
! git -C VIIRS_NOAA/ pull

In [None]:
! pip install netCDF4
from netCDF4 import Dataset
import numpy as np
import sys
import time
import calendar
import datetime as dt
import pandas as pd

In [None]:

#!/usr/bin/python      

#This finds the user's current path so that all hdf4 files can be found
try:
    fileList = open('VIIRS_NOAA/fileList.txt', 'r')

except:
    print('Did not find a text file containing file names (perhaps name does not match)')
    sys.exit()

#loops through all files listed in the text file
for FILE_NAME in fileList:
    FILE_NAME=FILE_NAME.strip()
    user_input=input('\nWould you like to process\n' + FILE_NAME + '\n\n(Y/N)')
    if (user_input == 'N' or user_input == 'n'):
        print('Skipping...')
        continue
    else:
        file = Dataset('VIIRS_NOAA/' + FILE_NAME, 'r')
# read the data
        if 'AOD' in FILE_NAME:
            print('This is a VIIRS Deep Blue file.')
            #this is how you access the data tree in an hdf5 file
            SDS_NAME='AOD550'    
        ds=file  
        lat= ds.variables['Latitude'][:][:]
        lon= ds.variables['Longitude'][:][:]
        data= ds.variables[SDS_NAME]

        #get necessary attributes 
        fv=data._FillValue
          
        fileparts=FILE_NAME.split('_')

        #There are some columns that are going to be the same
        #like the year, month and so on listed below.
        #Therefore, we can make the columns for them to store
        #the data for every row.
        year = np.zeros(lat.shape)
        mth = np.zeros(lat.shape)
        doy = np.zeros(lat.shape)
        hr = np.zeros(lat.shape)
        mn = np.zeros(lat.shape)
        
        for i in range(0,lat.shape[0]):
            y= fileparts[3][1:5]
            h = fileparts[3][9:11]
            m = fileparts[3][11:13]
            date = y + ',' + fileparts[3][5:8] + ',' + h + ',' + m
            t2 = dt.datetime.strptime(date,'%Y,%j,%H,%M')
           
            mt = t2.month
            d = t2.day
            
            year[i][:] = y
            mth[i][:] = mt
            doy[i][:] = d
            hr[i][:] = h
            mn[i][:] = m
            
        vlist = list(file.variables.keys())
        
        #create the dataframe and enter the values here
        df = pd.DataFrame()
        df['Year'] = year.ravel()
        df['Month'] = mth.ravel()
        df['Day'] = doy.ravel()
        df['Hour'] = hr.ravel()
        df['Minute'] = mn.ravel()
        
        #0-->Aerosol_Optical_Thickness_550_Land
        #3-->Aerosol_Optical_Thickness_550_Land_Ocean_Best_Estimate
        #8-->Aerosol_Optical_Thickness_QA_Flag_Land
        #11-->Aerosol_Type_Land_Ocean
        #18-->Angstrom_Exponent_Land_Ocean_Best_Estimate
        sds_lst = [ 'AOD550',
                   'QCAll',
                   'AerMdl']
        
        #This for loop saves all of the SDS in the dictionary at the top (dependent on file type) to the array (with titles)
        #All the sds that we need seem to be contained in this range.
        #Can extend this range to loop through more sds variables in the NC file.
        for i in range(0,20):
            SDS_NAME=vlist[(i)] # The name of the sds to read
            
            if SDS_NAME in sds_lst:
                print('SDS_NAME', SDS_NAME)
                #try:
                sds=ds.variables[SDS_NAME]
               
                scale = 1.0
                fv=sds._FillValue
                #get SDS data as a vector
                data=sds[:].ravel()
               #The next few lines change fill value/missing value to NaN so that we can multiply valid values by the scale factor, then back to fill values for saving
                data=data.astype(float)
                data=(data)*scale  
                data[np.isnan(data)]=fv
                data[data==float(fv)]=np.nan
                data=np.array(data[:])
                df[SDS_NAME] = data
    
    outfilename=FILE_NAME[:-3]+'.csv'    
    df.to_csv("drive/My Drive/Colab Notebooks/" + outfilename, index = False) 
    print('\nAll files have been saved successfully.')