In [6]:
import json
import requests
# Here's a sample JSON structure similar to what APIs return
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print("Station:", data['station'])
print("Location:", data['location'])
print("First observation:", data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


In [10]:
# 1. Extract and print all dates and temperatures (8 points)
print("Date, Temperature")
for obs in data['observations']: 
     print(obs['date'], obs['temperature'])

Date, Temperature
2023-01-01 32
2023-01-02 28
2023-01-03 35
2023-01-04 38
2023-01-05 41


In [13]:
# 2. Calculate average temperature (8 points)
total_temp = 0
count = 0

for obs in data['observations']:
    total_temp += obs['temperature']
    count += 1

avg_temp = total_temp / count  # Replace the 0

print(f"Average temperature: {avg_temp}°F")

Average temperature: 34.8°F


In [14]:
# 3. Find days with precipitation (9 points)
print("\nDays with precipitation:")

for obs in data['observations']:
    if obs['precipitation'] > 0:
        print(obs['date'], obs['precipitation'])


Days with precipitation:
2023-01-02 0.5
2023-01-04 0.2


In [20]:
# Use a real weather API (you may need to sign up for a free API key)
# Example APIs: OpenWeatherMap, NOAA, Weather.gov
import requests

points_url = "https://api.weather.gov/points/40.7789,-73.9692"
points_data = requests.get(points_url).json()

stations_url = points_data['properties']['observationStations']
stations_data = requests.get(stations_url).json()
station_id = stations_data['features'][0]['id']

data = requests.get(f"{station_id}/observations").json()


# 1. Extract and print all dates and temperatures
print("Date, Temperature")

for obs in data['features']:
    temp = obs['properties']['temperature']['value']
    if temp is not None:
        temp_f = temp * 9/5 + 32
        print(obs['properties']['timestamp'], f"{temp_f:.1f}°F")


# 2. Calculate average temperature
total_temp = 0
count = 0

for obs in data['features']:
    temp = obs['properties']['temperature']['value']
    if temp is not None:
        total_temp += temp * 9/5 + 32
        count += 1

avg_temp = total_temp / count if count else 0
print(f"Average temperature: {avg_temp:.1f}°F")


# 3. Find days with precipitation
print("\nDays with precipitation:")

precip_days = set()

for obs in data['features']:
    precip = obs['properties']['precipitationLastHour']['value']
    
    if precip is not None and precip > 0:
        date = obs['properties']['timestamp'].split("T")[0]
        precip_days.add(date)

for day in sorted(precip_days):
    print(day)

Date, Temperature
2026-02-17T19:51:00+00:00 45.0°F
2026-02-17T18:51:00+00:00 44.1°F
2026-02-17T18:02:00+00:00 42.1°F
2026-02-17T17:51:00+00:00 41.0°F
2026-02-17T17:46:00+00:00 39.2°F
2026-02-17T17:24:00+00:00 39.9°F
2026-02-17T17:01:00+00:00 39.0°F
2026-02-17T16:51:00+00:00 39.0°F
2026-02-17T16:33:00+00:00 37.9°F
2026-02-17T15:51:00+00:00 37.9°F
2026-02-17T15:26:00+00:00 37.0°F
2026-02-17T14:51:00+00:00 37.0°F
2026-02-17T14:49:00+00:00 37.4°F
2026-02-17T14:02:00+00:00 37.0°F
2026-02-17T13:51:00+00:00 37.0°F
2026-02-17T12:51:00+00:00 36.0°F
2026-02-17T11:51:00+00:00 36.0°F
2026-02-17T11:11:00+00:00 36.0°F
2026-02-17T10:51:00+00:00 36.0°F
2026-02-17T09:51:00+00:00 36.0°F
2026-02-17T08:51:00+00:00 36.0°F
2026-02-17T07:51:00+00:00 36.0°F
2026-02-17T07:09:00+00:00 36.0°F
2026-02-17T06:51:00+00:00 36.0°F
2026-02-17T06:13:00+00:00 36.0°F
2026-02-17T05:51:00+00:00 36.0°F
2026-02-17T05:35:00+00:00 36.0°F
2026-02-17T04:51:00+00:00 36.0°F
2026-02-17T03:51:00+00:00 36.0°F
2026-02-17T02:51:00+00:00

In [2]:
import pooch
import os

# Set up Pooch to download a file
# This example downloads a small air quality dataset
file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None,
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

File downloaded to: /home/iw2303/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


In [16]:
# 1. Verify the file was downloaded (5 points)
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

line_count = 0

with open(file_path) as file:
    for line in file:
        line_count += 1

print(f"Number of lines: {line_count}")

File size: 3133 bytes
Number of lines: 57


In [17]:
# 2. Download another file (10 points)
# Find a climate dataset online using the sources we talked about in lecture
# Download it using Pooch

file_path = pooch.retrieve(
    url="https://data.ornldaac.earthdata.nasa.gov/public/eos_land_val/Maryland_Temperature_Humidity/comp/Collection_Log.csv",
    known_hash=None,
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

line_count = 0

with open(file_path) as file:
    for line in file:
        line_count += 1

print(f"Number of lines: {line_count}")

File downloaded to: /home/iw2303/.cache/pooch/c503dd74ae55f28196155f4b9cdce54b-Collection_Log.csv
File exists: True
File size: 3133 bytes
Number of lines: 57


In [4]:
# 3. Create a data inventory (5 points)
# List all the files you've downloaded in this assignment
print("\nData Inventory:")
print("1. meteorites.csv - NASA meteorite landings")
print("2. air_quality_no2.csv - Air quality NO2 measurements")
print("3. Collection_Log.csv - Air temperature and humidity in Maryland")


Data Inventory:
1. meteorites.csv - NASA meteorite landings
2. air_quality_no2.csv - Air quality NO2 measurements
3. Collection_Log.csv - Air temperature and humidity in Maryland


In [8]:
import requests

# OPeNDAP provides metadata in different formats
# We'll get basic info about a climate dataset

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

# Get DDS (Dataset Descriptor Structure) - describes the structure
dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])  # Print first 500 characters

Dataset Structure:
Dataset {
    Float32 T[T = 324];
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



# 1. Identify dimensions and variables (5 points)
# Look at the DDS output above and answer:
# - What are the dimension names? - T, Y, X (for Time, Longitude, Latitude)
# - What is the main variable name? - rain

In [11]:
# 2. Get data attributes (5 points)
# DAS (Dataset Attribute Structure) contains metadata
das_url = base_url + ".das"
response = requests.get(das_url)

print("Data Attributes:")
print(response.text[:1000]) #Print first 1000 characters

Data Attributes:
Attributes {
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



# 3. Document what you learned (5 points)
# - What does this dataset contain?
    monthly precipitation data
# - What time period does it cover?
    1960-1987 - utilized the DDS output as well here since the DAS output only gives us String units "months since 1960-01-01", and String calendar "360", but we additionally have this info from the DDS: Float32 T[T = 324], from which I divided the 324 months by 12 months/year to find 27 years, and added that to 1960 to get 1960-1987.
# - What geographic region does it cover?
    Global - here I again needed to utilize the DDS output as well, in combination with the DAS output. From the DAS output we can take the 0.5 gridspacing noted as pointwidth, and I multiplied that to Y = 360 and X = 720 from the DDS output to get a 180 degrees latitude range and 360 degrees longitude range, which is about 90 S to 90 N and 180 W to 180 E.
# - What are the units of the main variable?
    mm/day