In [26]:
%%file wind_temperature.py

import re
import json

from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

class WindTemperature(MRJob):
    
    def mapper(self, _, line):
        val = line.strip()
        (wind_heading, wind_quality, temp) = (val[60:63], val[63], val[87:92])
        if (temp != "+9999" 
            and re.match(QUALITY_RE, wind_quality)
            and wind_heading != "999"):
            yield wind_heading, {"temp":int(temp), "count":1}
    
    def reducer(self, key, values):
        count = 0
        min_temp = None
        max_temp = None
        for reading in values:
            reading_temp = reading["temp"]
            if min_temp is None:
                min_temp = reading_temp
            if max_temp is None:
                max_temp = reading_temp
            min_temp = min(min_temp, reading_temp)
            max_temp = max(max_temp, reading_temp)
            count += reading["count"]
        yield key, {"low":min_temp, "high":max_temp, "count":count}
            
    
if __name__ == '__main__':
    WindTemperature.run()

Overwriting wind_temperature.py


In [27]:
!python wind_temperature.py -r local 1901 1902

No configs found; falling back on auto-configuration
No configs specified for local runner
Creating temp directory /tmp/wind_temperature.oreomilkshake.20240929.082428.142748
Running step 1 of 1...
job output is in /tmp/wind_temperature.oreomilkshake.20240929.082428.142748/output
Streaming final output from /tmp/wind_temperature.oreomilkshake.20240929.082428.142748/output...
"110"	{"low":-239,"high":278,"count":296}
"140"	{"low":-328,"high":278,"count":1005}
"200"	{"low":-183,"high":300,"count":688}
"290"	{"low":-328,"high":306,"count":379}
"320"	{"low":-311,"high":306,"count":1152}
"090"	{"low":-267,"high":272,"count":567}
"160"	{"low":-239,"high":289,"count":647}
"270"	{"low":-211,"high":278,"count":931}
"340"	{"low":-300,"high":311,"count":427}
"020"	{"low":-272,"high":317,"count":582}
"250"	{"low":-222,"high":311,"count":604}
"180"	{"low":-250,"high":294,"count":879}
"070"	{"low":-333,"high":278,"count":502}
"360"	{"low":-267,"high":289,"count":888}
"230"	{"low":-228,"high":283,"cou