## Script to read the FTP server and copy data files locally

This script connects to "ftp.cmdl.noaa.gov" server to copy .gz files for Aerosol data

In [2]:
#import all dependencies
from ftplib import FTP
import ftplib
import gzip
import os
import pandas as pd
import numpy as np
from calendar import monthrange

In [3]:
# Declarations
ftp_server = "ftp.cmdl.noaa.gov"

stations = ['bnd','brw','mlo','smo','spo','sum','thd']

In [4]:
 # Create a local directory with Station Code as the dirname

for nm in stations:
    if not os.path.exists(f"..\data\rawdata\_aerosol\_{nm}"):
        os.mkdir(f"..\data\rawdata\_aerosol\_{nm}")
        print(f"Directory _{nm} created")
    else:    
        print(f"Directory _{nm} already exists")

Directory _bnd already exists
Directory _brw already exists
Directory _mlo already exists
Directory _smo already exists
Directory _spo created
Directory _sum created
Directory _thd already exists


In [32]:
for s in stations:
    # Open FTP Server and read the folder
    with FTP(ftp_server) as ftp:
        try:
            ftp.login() ## login into ftp server
            ftp.cwd("aerosol/"+s) # change directory to the relevant station's dir

            #set parent dir
            parent_dir = ftp.pwd() # make sure parent directory is saved

            years = [] # list to capture the num of years records are available

            #get list of files
            ftp.retrlines('NLST', years.append)

            #browse through each year to obtain the .gzip raw data for particle aerosol concentration
            for y in years:
                ftp.cwd(y) # go to specific year dir
                print(ftp.pwd()) # print the current directory

                files = [] #get the list of files from the dir

                ftp.retrlines('NLST', files.append)

                #search for the file specific to particle concentration and ignore others
                filenm = ""
                filenm = [f for f in files if "particle_number_concentration" in f]

                print(filenm) #print filenm - empty if not present
                
                #if the filenm is present then copy to local drive
                if( filenm):
                    #open local gzip file to write the chunk data from FTP
                    with open(f"..\data\rawdata\_aerosol\_{s}\{y}{s}_conc.gz", 'wb') as f:
                        # Define the callback as a closure so it can access the opened 
                        # file in local scope
                        def callback(data):
                            f.write(data)
                
                        ftp.retrbinary('RETR %s' % filenm[0], callback) #retrieve file in binary to read and write locally
                
                #change to parent dir to access next year
                ftp.cwd(parent_dir)
            
            # close ftp connection
            ftp.quit()

        except ftplib.all_errors as e:
            print('FTP error:', e)


/data/aer/thd/0002
[]
/data/aer/thd/2002
['US6005G.20020410000000.20171020004835.particle_number_concentration.aerosol.266d.1h.lev2.nas.gz']
/data/aer/thd/2003
['US6005G.20030101000000.20171020004930.particle_number_concentration.aerosol.219d.1h.lev2.nas.gz', 'US6005G.20030808000000.20171020004930.particle_number_concentration.aerosol.129d.1h.lev2.nas.gz', 'US6005G.20031215000000.20171020004930.particle_number_concentration.aerosol.17d.1h.lev2.nas.gz']
/data/aer/thd/2004
['US6005G.20040101000000.20171020005039.particle_number_concentration.aerosol.366d.1h.lev2.nas.gz']
/data/aer/thd/2005
['US6005G.20050101000000.20171020005151.particle_number_concentration.aerosol.103d.1h.lev2.nas.gz', 'US6005G.20050414000000.20171020005151.particle_number_concentration.aerosol.262d.1h.lev2.nas.gz']
/data/aer/thd/2006
['US6005G.20060101000000.20171020005305.particle_number_concentration.aerosol.219d.1h.lev2.nas.gz', 'US6005G.20060808000000.20171020005305.particle_number_concentration.aerosol.146d.1h.le