# GHCNd station data

These are data from Global Historical Climatology Network daily (GHCNd)

> https://www.ncei.noaa.gov/products/land-based-station/global-historical-climatology-network-daily
>
> The Global Historical Climatology Network daily (GHCNd) is an integrated database of daily climate summaries from land surface stations across the globe. GHCNd is made up of daily climate records from numerous sources that have been integrated and subjected to a common suite of quality assurance reviews.
>
> GHCNd contains records from more than 100,000 stations in 180 countries and territories. NCEI provides numerous daily variables, including maximum and minimum temperature, total daily precipitation, snowfall, and snow depth. About half the stations only report precipitation. Both record length and period of record vary by station and cover intervals ranging from less than a year to more than 175 years.


In [None]:
import time
import pandas as pd
import geopandas as gpd
import contextily as cx
import matplotlib.pyplot as plt
import yaml
import sys

ghcnd_stations_url = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/ghcnd-stations.txt'
ghcnd_stations_url = '/home/javi/Research/cordex-fps-urb-rcc/git/cordex-fps-urb-rcc/ghcnd-stations.txt'
ghcnd_stations_column_names = ['code', 'lat', 'lon', 'elev', 'name', 'net', 'numcode']
ghcnd_stations_column_widths = [   11,     9,    10,      7,     34,     4,       10 ]

In [None]:
df = pd.read_fwf(ghcnd_stations_url, header = 0,
  widths = ghcnd_stations_column_widths,
  names = ghcnd_stations_column_names
)
ghcnd_stations = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs = 'EPSG:4326')

In [None]:
p = ghcnd_stations.plot(figsize=(20,10), markersize=2)

## Read city databases

Read in OECC city info

In [None]:
df = pd.read_csv('city_info_out.csv', comment='#',
  dtype = dict(domain = 'category', ktype = 'category')
)
city_info = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs = 'EPSG:4326')

and the names of the currently selected cities

In [None]:
with open('selected_cities.yaml') as fp:
  selected = yaml.load(fp, Loader=yaml.FullLoader)['cordex_core_analysis']

## Plot nearby stations (functions)

By default, we select those within half a degree around the city center

In [None]:
def nearby_stations(city, maxdis = 0.5):
  citydf = city_info.query(f'city == "{city}"').squeeze()
  nearby_stations = ghcnd_stations[ghcnd_stations.distance(citydf.geometry) < maxdis].to_crs(epsg=3857)
  return(nearby_stations)

def plot_nearby_stations(city, nearby_stations, maxdis = 0.5, highlight=pd.DataFrame()):
  citydf = city_info.query(f'city == "{city}"').squeeze()
  img, ext = cx.bounds2img(
    citydf.lon - maxdis, citydf.lat - maxdis, citydf.lon + maxdis, citydf.lat + maxdis, ll=True,
    # source=cx.providers.Stamen.Toner
    # source=cx.providers.OpenStreetMap.Mapnik
  )
  ax = nearby_stations.plot(figsize=(15,15), color = 'red')
  for x, y, label in zip(nearby_stations.geometry.x, nearby_stations.geometry.y, nearby_stations.name):
    ax.annotate(label, xy=(x, y), xytext=(10, -10), textcoords='offset points', color = 'red')
  if not highlight.empty:
    highlight.plot(ax=ax, color='blue', edgecolor='white', zorder=100)
  for x, y, label in zip(highlight.geometry.x, highlight.geometry.y, highlight.name):
    ax.annotate(label, xy=(x, y), xytext=(10, -10), textcoords='offset points', color = 'blue', fontsize=20)
  ax.imshow(img, extent=ext)
  plt.title(city)
  plt.savefig('results/pictures/Nearby_stations_cycle_' + city + '.png', facecolor='white')

  #cx.add_basemap(ax, reset_extent = False)

def get_ghcnd_df(code):
    baseurl = 'http://meteo.unican.es/work/chus/ghcnd/data'
    try:
      rval = pd.read_csv(f'{baseurl}/{code[0]}/{code}.csv.gz', compression='gzip', index_col='DATE', parse_dates=True)
    except:
      print(f'Problem downloading {code}')
      rval = pd.DataFrame()
    return(rval)

def available_vars(station):
  return(set(station.columns).intersection({'TMIN'}))
  #return(set(station.columns).intersection({'PRCP', 'TAVG', 'TMAX', 'TMIN', 'SNWD'}))

def plot_station_timeseries(city, stations, var = 'PRCP', v_records = 0.5, idate='1979-01-01', fdate='2014-12-31'):
  period = slice(idate, fdate)
  ndays = (pd.to_datetime(fdate)-pd.to_datetime(idate)).days
  valid_codes = []
  for stn_code in stations.code:
    stn_data = get_ghcnd_df(stn_code)
    if stn_data.empty:
      continue
    availvars = available_vars(stn_data)
    print(f'{stn_data.NAME[0]} {availvars}', end=' ')
    if var in availvars:
      valid_records = stn_data[var].loc[period].notna().sum()/ndays
      print(f'  {var} has {100*valid_records:.1f}% valid records in {idate} to {fdate}')
      if valid_records > v_records:
        plt.figure()
        stn_data[var].loc[period].plot(
          figsize=(14,3),
          xlim=(idate,fdate),
          title = f'{city} -- {stn_data.NAME[0]} ({100*valid_records:.0f}%)'
        )
        valid_codes.append(stn_code)
    else:
      print(f'  No {var}')
  return(stations[stations.code.isin(valid_codes)])

In [None]:
def plot_city(city_name, maxdis = 0.5, v_records = 0.3):
  nearstat = nearby_stations(city_name, maxdis = maxdis)
  if not nearstat.empty:
    valid = plot_station_timeseries(city_name, nearstat, 'TMIN', v_records)
    plot_nearby_stations(city_name, nearstat, maxdis= maxdis, highlight=valid)
  else:
    print('No nearby stations in GHCNd')

In [None]:
import warnings
warnings.filterwarnings('ignore') # Hide UserWarnings alerting on the approximate distance in geographic CRS

# Selected cities

For each city, surrounding GHCNd stations with valid values above 50% are shown as time series. The city map displaus all surrounding stations (in red) along with those meeting the valid data criterion (in blue).

Next cell contents generated by:

```python
for city in selected:
  print(f'## {city}\nplot_city("{city}")')
```

```python
print(' · '.join([f'[{city}](#{city.replace(" ","-")})' for city in selected]))
```

[Cairo](#Cairo) · [Lagos](#Lagos) · [Johannesburg](#Johannesburg) · [Luanda](#Luanda) · [Khartoum](#Khartoum) · [Sydney](#Sydney) · [Melbourne](#Melbourne) · [Mexico City](#Mexico-City) · [Moscow](#Moscow) · [Tashkent](#Tashkent) · [Tehran](#Tehran) · [Dhaka](#Dhaka) · [Beijing](#Beijing) · [Tokyo](#Tokyo) · [Chengdu](#Chengdu) · [Seoul](#Seoul) · [Shanghai](#Shanghai) · [Paris](#Paris) · [London](#London) · [Istanbul](#Istanbul) · [Berlin](#Berlin) · [Helsinki](#Helsinki) · [New York](#New-York) · [Chicago](#Chicago) · [Los Angeles](#Los-Angeles) · [Montreal](#Montreal) · [Toronto](#Toronto) · [Buenos Aires](#Buenos-Aires) · [Lima](#Lima) · [São Paulo](#São-Paulo) · [Santiago](#Santiago) · [Jakarta](#Jakarta) · [Quezon City [Manila]](#Quezon-City-[Manila]) · [Singapore](#Singapore) · [Mumbai](#Mumbai) · [Riyadh](#Riyadh) · [Delhi [New Delhi]](#Delhi-[New-Delhi]) · [Baghdad](#Baghdad)

## Sydney

In [None]:
plot_city("London", maxdis = 0.5, v_records = 0.5)

## Lagos

In [None]:
plot_city("Lagos")

## Johannesburg

In [None]:
plot_city("Johannesburg")

## Luanda

In [None]:
plot_city("Luanda")

## Khartoum

In [None]:
plot_city("Khartoum")

## Sydney

In [None]:
plot_city("Sydney")

## Melbourne

In [None]:
plot_city("Melbourne")

## Mexico City

In [None]:
plot_city("Mexico City")

## Moscow

In [None]:
plot_city("Moscow")

## Tashkent

In [None]:
plot_city("Tashkent")

## Tehran

In [None]:
plot_city("Tehran")

## Dhaka

In [None]:
plot_city("Dhaka")

## Beijing

In [None]:
plot_city("Beijing", maxdis = 2)

## Tokyo

In [None]:
plot_city("Tokyo")

## Chengdu

In [None]:
plot_city("Chengdu")

## Seoul

In [None]:
plot_city("Seoul")

## Shanghai

In [None]:
plot_city("Shanghai")

## Paris

In [None]:
plot_city("Paris")

## London

In [None]:
plot_city("London")

## Istanbul

In [None]:
plot_city("Istanbul")

## Berlin

In [None]:
plot_city("Berlin")

## Helsinki

In [None]:
plot_city("Helsinki")

## New York

In [None]:
plot_city("New York")

## Chicago

In [None]:
plot_city("Chicago")

## Los Angeles

In [None]:
plot_city("Los Angeles")

## Montreal

In [None]:
plot_city("Montreal")

## Toronto

In [None]:
plot_city("Toronto")

## Buenos Aires

In [None]:
plot_city("Buenos Aires")

## Lima

In [None]:
plot_city("Lima")

## São Paulo

In [None]:
plot_city("São Paulo")

## Santiago

In [None]:
plot_city("Santiago")

## Jakarta

In [None]:
plot_city("Jakarta")

## Quezon City [Manila]

In [None]:
plot_city("Quezon City [Manila]")

## Singapore

In [None]:
plot_city("Singapore")

## Mumbai

In [None]:
plot_city("Mumbai")

## Riyadh

In [None]:
plot_city("Riyadh")

## Delhi [New Delhi]

In [None]:
plot_city("Delhi [New Delhi]")

## Baghdad

In [None]:
plot_city("Baghdad")