In [1]:
import ftplib
import gzip
import pandas as pd

In [2]:
def get_file(ftp, fname):
    result = bytes()
    def append_to_bytes(bs):
        nonlocal result
        result += bs
    ftp.retrbinary('RETR {}'.format(fname), append_to_bytes)
    return result

In [3]:
ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
ftp.login()



In [4]:
ftp.cwd('pub/data/noaa/2020/')
files = list(ftp.mlsd())

In [5]:
files

[('.',
  {'modify': '20200131154047',
   'perm': 'fle',
   'type': 'cdir',
   'unique': '3CU17328E50',
   'unix.group': '4021',
   'unix.mode': '0775',
   'unix.owner': '4021'}),
 ('..',
  {'modify': '20200131154120',
   'perm': 'fle',
   'type': 'pdir',
   'unique': '3CU2F87CC5',
   'unix.group': '4021',
   'unix.mode': '0775',
   'unix.owner': '4021'}),
 ('359270-99999-2020.gz',
  {'modify': '20200131152513',
   'perm': 'adfr',
   'size': '8374',
   'type': 'file',
   'unique': '3CU17BF18D8',
   'unix.group': '4021',
   'unix.mode': '0664',
   'unix.owner': '4021'}),
 ('715820-99999-2020.gz',
  {'modify': '20200131152939',
   'perm': 'adfr',
   'size': '131991',
   'type': 'file',
   'unique': '3CU17CF2A43',
   'unix.group': '4021',
   'unix.mode': '0664',
   'unix.owner': '4021'}),
 ('715901-99999-2020.gz',
  {'modify': '20200131152939',
   'perm': 'adfr',
   'size': '9696',
   'type': 'file',
   'unique': '3CU17CF2A4C',
   'unix.group': '4021',
   'unix.mode': '0664',
   'unix.owne

In [6]:
fname = files[2][0]
file = get_file(ftp, fname)

In [7]:
file = gzip.decompress(file)

In [30]:
import datetime
import pprint

def value_or_missing(val):
    '''
    ISD values uses a string of 9s to indicate missing data.
    This method checks if the data is missing and returns None if it is,
    or the actual string if not
    '''
    nval = len(val)
    return None if (val == "9"*nval) else val

def parse_isd_line(line):
    '''
    Parses a NOAA isd line. Returns data as a dictionary.
    '''
    # Station data
    var_data_len = int(line[0:4])
    usaf_station_id = value_or_missing(line[4:10])
    wban_station_id = value_or_missing(line[10:15])
    date = value_or_missing(line[15:27])
    date = datetime.datetime.strptime(date, "%Y%m%d%H%M") if date is not None else None
    latitude = value_or_missing(line[28:34])
    latitude = float(latitude) / 1000.0 if latitude else None
    longitude = value_or_missing(line[34:41])
    longitude = float(longitude) / 1000.0 if longitude else None
    report_type = value_or_missing(line[41:46])
    elevation = value_or_missing(line[46:51])
    elevation = int(elevation) if elevation else None
    call_letters = value_or_missing(line[51:56])
    qc_process = value_or_missing(line[56:59])
    
    # Air temp
    air_temp_c = value_or_missing(line[87:92])
    air_temp_c = float(air_temp_c) / 10. if air_temp_c else None
    
    return {
        "var_data_len": var_data_len,
        "usaf_station_id": usaf_station_id,
        "wban_station_id": wban_station_id,
        "date": date,
        "latitude": latitude,
        "longitude": longitude,
        "report_type": report_type,
        "elevation_meters": elevation,
        "call_letters": call_letters,
        "qc_process": qc_process,
        "air_temp_c": air_temp_c,
    }

for line in file.decode().split("\n")[:20]:
    data = parse_isd_line(line)
    pprint.pprint(data)
    print(line, "\n")

{'air_temp_c': 2.1,
 'call_letters': None,
 'date': datetime.datetime(2020, 1, 1, 0, 0),
 'elevation_meters': 74,
 'latitude': 45.25,
 'longitude': 55.083,
 'qc_process': 'V02',
 'report_type': 'FM-12',
 'usaf_station_id': '359270',
 'var_data_len': 153,
 'wban_station_id': None}
0153359270999992020010100004+45250+055083FM-12+007499999V0201801N002019999999N999999999+00211-00011999999ADDGA1081+008001061GE19MSL   +99999+99999GF108991081999008001999999MA1999999101491MD1710111+9999REMSYN04835927 425// 81802 10021 21001 30149 57011 885//= 

{'air_temp_c': 1.9,
 'call_letters': None,
 'date': datetime.datetime(2020, 1, 1, 3, 0),
 'elevation_meters': 74,
 'latitude': 45.25,
 'longitude': 55.083,
 'qc_process': 'V02',
 'report_type': 'FM-12',
 'usaf_station_id': '359270',
 'var_data_len': 169,
 'wban_station_id': None}
0169359270999992020010103004+45250+055083FM-12+007499999V0201601N002019999999N999999999+00191-00061999999ADDGA1081+008001061GE19MSL   +99999+99999GF108991081999008001999999MA199