Extracting HydroGFD data from the HydroGFD database for Europe 
==============================================================
Data is extracted in the same way as in Data Combination and Cleaning.ipynb. See that file for reference.


In [1]:
# Load Libraries
import numpy as np
import pandas as pd
import netCDF4 as nc
import glob
from datetime import datetime, timedelta

In [4]:
# Extract at 49.82 N and 7.6 E
# Pull from 2003 to 2007
precipFiles = glob.glob("HydroGFD/prAdjust*")[-3:]

# Loop through files and extract data and store in single dataframe
precip = pd.DataFrame(columns=["Lat", "Lon", "Time", "Precipitation"])

for file in precipFiles:
    ncid = nc.Dataset(file)

    #Pull out the time data and coordiante data
    time = ncid.variables["time"][:].filled(np.nan)
    lat = ncid.variables["lat"][:].filled(np.nan)
    lon = ncid.variables["lon"][:].filled(np.nan)

    # Finding nearest neighbor to 49.82 N and 7.6 E
    latIndex = np.abs(lat - 49.82).argmin()
    lonIndex = np.abs(lon - 7.6).argmin()

    # Pull out the precipitation data
    prAdjust = ncid.variables["prAdjust"][:, latIndex, lonIndex].filled(np.nan)

    # Place the lat, lon, time, and precipitation data into a dataframe
    df = pd.DataFrame({"Lat": 49.82, "Lon": 7.6, "Time": time, "Precipitation": prAdjust})
    precip = pd.concat([precip, df], ignore_index=True)
    
    print("Finished extracting data from " + file[-20:-3])
    ncid.close()

# Convert the time data to datetime format
precip["Time"] = precip["Time"].apply(lambda x: datetime(1850, 1, 1) + timedelta(days=x))

  precip = pd.concat([precip, df], ignore_index=True)


Finished extracting data from 20000101-20041231
Finished extracting data from 20050101-20051231
Finished extracting data from 20060101-20101231


In [6]:
# Do same thing for temperature data
tempFiles = glob.glob("HydroGFD/tasAdjust*")[-3:]

temp = pd.DataFrame(columns=["Lat", "Lon", "Time", "Temperature"])

for file in tempFiles:
    ncid = nc.Dataset(file)

    #Pull out the time data and coordiante data
    time = ncid.variables["time"][:].filled(np.nan)
    lat = ncid.variables["lat"][:].filled(np.nan)
    lon = ncid.variables["lon"][:].filled(np.nan)

    # Finding nearest neighbor to 49.82 N and 7.6 E
    latIndex = np.abs(lat - 49.82).argmin()
    lonIndex = np.abs(lon - 7.6).argmin()

    # Pull out the temperature data
    tasAdjust = ncid.variables["tasAdjust"][:, latIndex, lonIndex].filled(np.nan)

    # Place the lat, lon, time, and temperature data into a dataframe
    df = pd.DataFrame({"Lat": 49.82, "Lon": 7.6, "Time": time, "Temperature": tasAdjust})
    temp = pd.concat([temp, df], ignore_index=True)
    
    print("Finished extracting data from " + file[-20:-3])
    ncid.close()

temp["Time"] = temp["Time"].apply(lambda x: datetime(1850, 1, 1) + timedelta(days=x))

  temp = pd.concat([temp, df], ignore_index=True)


Finished extracting data from 20000101-20041231
Finished extracting data from 20050101-20051231
Finished extracting data from 20060101-20101231


In [7]:
# Combine the precipitation and temperature data into a single dataframe
data = pd.merge(precip, temp, on=["Lat", "Lon", "Time"])

# Save the data to a csv file
data.to_csv("Europe_Data.csv", index=False)