In [1]:
%%file weather_analyzer.py
"""weather_analyzer.py
  
Contains a class for inputting/outputting specific weather data.
"""

import re
import json
from mrjob.job import MRJob


# Global constant.
QUALITY_RE = re.compile(r"[01459]")


class WeatherAnalyzer(MRJob):
  """
  Contains functions for inputting/outputting specific weather data.
  """

  def mapper(self, _, line):
    """
    Input: extracts wind direction, wind quality, temperature,
      and temperature quality.
    """
    # Clean data.
    val = line.strip()

    # Extract and map data.
    wind_direction = val[60:63]
    wind_quality = val[63]
    temperature = val[87:92]
    temperature_quality = val[92]

    # Sad path: do not keep unknown values/mismatched qualities.
    if (wind_direction == "999" or not re.match(QUALITY_RE, wind_quality)):
      return
    if(temperature == "+9999" or not re.match(QUALITY_RE, temperature_quality)):
      return

    # Yield the valid direction and dictionary of data.
    yield wind_direction, {
                            "wind_quality": wind_quality,
                            "temperature": temperature,
                            "temperature_quality": temperature_quality
                          }


  def reducer(self, key, values):
    """
    Output: shows the low/high temperature and count for each
      valid wind direction.
    """
    # Store all the temperature values for the key (wind direction).
    temperatures = []
    for data in values:
      temperatures.append(int(data["temperature"]))

    # Yields the key and a dictionary with the low, high, and count.
    yield int(key), {"low": min(temperatures), "high": max(temperatures),
                     "count": len(temperatures)}



if __name__ == "__main__":
  WeatherAnalyzer.run()


Overwriting weather_analyzer.py


In [2]:
!python weather_analyzer.py --no-bootstrap-mrjob 1901 1902

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/weather_analyzer.thomas.20231001.214501.070329
Running step 1 of 1...
job output is in /tmp/weather_analyzer.thomas.20231001.214501.070329/output
Streaming final output from /tmp/weather_analyzer.thomas.20231001.214501.070329/output...
250	{"low":-222,"high":311,"count":604}
160	{"low":-239,"high":289,"count":647}
20	{"low":-272,"high":317,"count":582}
180	{"low":-250,"high":294,"count":879}
230	{"low":-228,"high":283,"count":1488}
340	{"low":-300,"high":311,"count":427}
110	{"low":-239,"high":278,"count":296}
140	{"low":-328,"high":278,"count":1005}
70	{"low":-333,"high":278,"count":502}
90	{"low":-267,"high":272,"count":567}
200	{"low":-183,"high":300,"count":688}
320	{"low":-311,"high":306,"count":1152}
290	{"low":-328,"high":306,"count":379}
270	{"low":-211,"high":278,"count":931}
360	{"low":-267,"high":289,"count":888}
50	{"low":-322,"high":306,"count":1039}
Rem