From 626fd244893184f9608a96207995a6bc737fdf30 Mon Sep 17 00:00:00 2001
From: Fabien Maussion <fabien.maussion@uibk.ac.at>
Date: Thu, 12 Nov 2015 21:52:42 +0100
Subject: [PATCH] getting started

---
 docs/Getting_started.ipynb | 716 +++++++++++++++++++++++++++++++++++++
 1 file changed, 716 insertions(+)
 create mode 100644 docs/Getting_started.ipynb
diff --git a/docs/Getting_started.ipynb b/docs/Getting_started.ipynb
new file mode 100644
index 000000000..70bbc1a3f
--- /dev/null
+++ b/docs/Getting_started.ipynb
@@ -0,0 +1,716 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://raw.githubusercontent.com/OGGM/oggm/master/files/logo.png\" width=\"40%\"  align=\"left\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting started with OGGM: Öztal case study"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The OGGM workflow is best explained with an example. We are going to use on the testcase that we use for integration testing in oggm. The test files are located in a dedicated online repository, [oggm-sample-data](https://github.com/OGGM/oggm-sample-data)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the `test-workflow` directory you can have a look at the various files we will need. oggm also needs them for testing, so they are automatically available thanks to a simple mechanism:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from oggm.utils import get_demo_file\n",
+    "srtm_f = get_demo_file('srtm_oeztal.tif')\n",
+    "rgi_f = get_demo_file('rgi_oeztal.shp')\n",
+    "print(srtm_f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The very first time that you make a call to `get_demo_file()`, oggm will create a hidden `.oggm` directory in your home folder$^*$ and download the demo files in it.\n",
+    "\n",
+    "<sub>*: this path might vary depending on your platform, see python's [expanduser](https://docs.python.org/3.5/library/os.path.html#os.path.expanduser)</sub>. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### DEM and glacier outlines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The data directory also contains a subset of the RGI shapefile for Öztal:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import salem  # https://github.com/fmaussion/salem\n",
+    "rgi_shp = salem.utils.read_shapefile(rgi_f).set_index('RGIID')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's have a look at it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import cleo  # https://github.com/fmaussion/cleo\n",
+    "import shapely.geometry as shpg\n",
+    "# Plot defaults\n",
+    "plt.rcParams['figure.figsize'] = (8, 8)  # Default plot size\n",
+    "import logging\n",
+    "logging.basicConfig(format='%(asctime)s: %(name)s: %(message)s',\n",
+    "                    datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Open the DEM file\n",
+    "dem = salem.GeoTiff(srtm_f)\n",
+    "# Center the map on the region of interest\n",
+    "dem.set_subset(margin=-80)\n",
+    "cmap = cleo.Map(dem.grid, countries=False)\n",
+    "cmap.set_topography(dem.get_vardata())\n",
+    "# Add a reference point for Vent\n",
+    "cmap.set_geometry(shpg.Point(10.913889, 46.859444), facecolor='k', text='Vent')\n",
+    "# Plot the RGI file\n",
+    "cmap.set_shapefile(rgi_f, edgecolor='crimson', linewidth=2, label='RGI')\n",
+    "cmap.visualize()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### WGMS and GlaThiDa "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These are 19 selected glaciers where we have either mass-balance data (WGMS) or volume information (GlaThiDa). These data are required for calibration/validation and are also available from the test directory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "df_gtd = pd.read_csv(get_demo_file('RGI_GLATHIDA_oeztal.csv'))\n",
+    "df_wgms = pd.read_csv(get_demo_file('RGI_WGMS_oeztal.csv'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "x, y = rgi_shp.loc[df_gtd.RGI_ID].CENLON, rgi_shp.loc[df_gtd.RGI_ID].CENLAT\n",
+    "cmap.set_points(x, y, marker='x', c='blue', label='GlaThiDa')\n",
+    "x, y = rgi_shp.loc[df_wgms.RGI_ID].CENLON, rgi_shp.loc[df_wgms.RGI_ID].CENLAT\n",
+    "cmap.set_points(x, y, marker='o', s=100, facecolor='none', edgecolor='purple', label='WGMS')\n",
+    "cmap.visualize()\n",
+    "plt.legend();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Climate data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use HISTALP data which goes back further in time than CRU:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "nc = salem.GeoNetcdf(get_demo_file('HISTALP_oeztal.nc'))\n",
+    "t2 = np.mean(nc.get_vardata('temp'), axis=0)\n",
+    "cmap.set_data(t2, nc.grid)\n",
+    "cmap.set_cmap(plt.get_cmap('viridis'))\n",
+    "cmap.set_nlevels(256)\n",
+    "cmap.visualize()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(9, 3))\n",
+    "pd.Series(nc.get_vardata('temp')[:, 3, 3], index=nc.time).resample('A').plot()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up an OGGM run"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "OGGM parameters are gathered in a configuration file. The [default file](https://github.com/OGGM/oggm/blob/master/oggm/params.cfg) is shipped with the code. It is used to initialize the configuration module:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import oggm\n",
+    "import oggm.conf as cfg\n",
+    "from oggm import workflow\n",
+    "cfg.initialize()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For example, the default working directory is also located in the home directory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(cfg.paths)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The necessary data files are missing. Let's set them so that the oggm modules now where to look for the data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "cfg.paths['srtm_file'] = get_demo_file('srtm_oeztal.tif')\n",
+    "cfg.paths['histalp_file'] = get_demo_file('HISTALP_oeztal.nc')\n",
+    "cfg.paths['wgms_rgi_links'] = get_demo_file('RGI_WGMS_oeztal.csv')\n",
+    "cfg.paths['glathida_rgi_links'] = get_demo_file('RGI_GLATHIDA_oeztal.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We keep the other parameters to their default values, for example the precipitation scaling factor:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(cfg.params['prcp_scaling_factor'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is for the glacier divides (currently only Hintereisferner):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "cfg.set_divides_db(get_demo_file('HEF_divided.shp'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Glacier working directories"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "An OGGM run is constituted of several successive tasks to be applied on each glacier. Because these tasks can be computationally and data demanding, they are split in smaller tasks, each of them storing their results in a glacier directory. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The very first task is always:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Read in the RGI file\n",
+    "import geopandas as gpd\n",
+    "rgi_file = get_demo_file('rgi_oeztal.shp')\n",
+    "rgidf = gpd.GeoDataFrame.from_file(rgi_file)\n",
+    "# Just in case you do the tutorial for a second time, we delete the content of the directory\n",
+    "cfg.reset_working_dir()\n",
+    "# Initialise directories\n",
+    "gdirs = oggm.workflow.init_glacier_regions(rgidf)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that if I run the init_glacier_regions a second time, nothing special happens:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "gdirs = workflow.init_glacier_regions(rgidf)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is because the directories have been define already. Now what is the variable `gdirs`? It is a list of 19 `oggm.conf.GlacierDir` objects. They are here to help us to handle data I/O:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "gdir = gdirs[13]\n",
+    "print(gdir.dir)\n",
+    "print(gdir.rgi_id)\n",
+    "print(gdir.rgi_date)\n",
+    "print(gdir.glacier_area)\n",
+    "print(gdir.grid.dx)\n",
+    "print(gdir.get_filepath('dem'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`dem.tif` is a local digital elevation model with a spatial resolution chosen as a function of the size of the model. These GlacierDir objects are going to be the input of almost every OGGM function. It is therefore very easy for the function to get the input it needs from the data stored in the directory. For example, to make a map:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from oggm import graphics\n",
+    "graphics.plot_googlemap(gdir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## OGGM tasks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The workflow of OGGM is oriented around the concept of \"tasks\". There are two different types:\n",
+    "\n",
+    "**Entity Task**:\n",
+    "  Standalone operations to be realized on one single glacier entity,\n",
+    "  independently from the others. The majority of OGGM\n",
+    "  tasks are entity tasks. They are parallelisable.\n",
+    "\n",
+    "**Global Task**:\n",
+    "  tasks which require to work on several glacier entities\n",
+    "  at the same time. Model parameter calibration or interpolation of degree day factors belong to\n",
+    "  this type of task. They are not parallelisable.\n",
+    "  \n",
+    "OGGM implements a very simple mechanism to run a specific task on a list of `GlacierDir` objects (here, the function `glacier_masks()` from the module `oggm.prepro.gis`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "workflow.execute_task(oggm.prepro.gis.glacier_masks, gdirs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is also possible to apply several tasks sequentially:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "list_talks = [\n",
+    "         oggm.prepro.centerlines.compute_centerlines,\n",
+    "         oggm.prepro.centerlines.compute_downstream_lines,\n",
+    "         oggm.prepro.geometry.catchment_area,\n",
+    "         oggm.prepro.geometry.initialize_flowlines,\n",
+    "         oggm.prepro.geometry.catchment_width_geom,\n",
+    "         oggm.prepro.geometry.catchment_width_correction\n",
+    "         ]\n",
+    "for task in list_talks:\n",
+    "    workflow.execute_task(task, gdirs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The function `execute_task` can run several tasks in parallel if specified in the configuration file. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We just computed the glacier flowlines and their width:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "graphics.plot_catchment_width(gdir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global tasks "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will go into more detail about tasks and global tasks when I have more time to write a good documentation. For now, we will use the helper function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "workflow.climate_tasks(gdirs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "and:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "workflow.execute_task(oggm.prepro.inversion.prepare_for_inversion, gdirs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fs, fd = oggm.prepro.inversion.optimize_inversion_params(gdirs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inversion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We just optimised the parameters **fs** and **fd** required for the glacier thickness inversion. Now we can calculate their thickness:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "vol = []\n",
+    "area = []\n",
+    "for g in gdirs:\n",
+    "    v, a = oggm.prepro.inversion.inversion_parabolic_point_slope(g, fs=fs, fd=fd, write=True)\n",
+    "    vol.append(v)\n",
+    "    area.append(a)\n",
+    "df = pd.DataFrame()\n",
+    "df['area'] = np.array(area) * 1e-6\n",
+    "df['oggm_thick'] = np.array(vol) * 1e-9 / df['area'] * 1000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "graphics.plot_inversion(gdir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Statistics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read in the reference data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Ref data\n",
+    "gtd_df = pd.read_csv(cfg.paths['glathida_rgi_links']).sort_values(by=['RGI_ID'])\n",
+    "# Account for area differences between glathida and rgi\n",
+    "ref_area_km2 = gtd_df.RGI_AREA.values\n",
+    "ref_cs = gtd_df.VOLUME.values / (gtd_df.GTD_AREA.values**1.375)\n",
+    "df['ref_thick'] = ref_cs * ref_area_km2**1.375 / df['area'] * 1000"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Add the volume obtained with volume area scaling for comparison: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "df['vas_thick'] = 0.034*(ref_area_km2**1.375) / df['area'] * 1000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))\n",
+    "ax1.scatter(df['ref_thick'], df['oggm_thick'], s=100)\n",
+    "ax1.set_title('OGGM RMSD: {:.2f}'.format(oggm.utils.rmsd(df['ref_thick'], df['oggm_thick'])))\n",
+    "ax1.set_xlabel('Ref thickness')\n",
+    "ax1.set_ylabel('OGGM thickness')\n",
+    "ax1.plot([0, 100], [0, 100], '.:k', zorder=0);\n",
+    "ax1.set_xlim([0, 100]), ax1.set_ylim([0, 100]);\n",
+    "ax2.scatter(df['ref_thick'], df['vas_thick'], s=100)\n",
+    "ax2.set_title('Volume-Area RMSD: {:.2f}'.format(oggm.utils.rmsd(df['ref_thick'], df['vas_thick'])))\n",
+    "ax2.set_xlabel('Ref thickness')\n",
+    "ax2.set_ylabel('VAS thickness')\n",
+    "ax2.plot([0, 100], [0, 100], '.:k', zorder=0);\n",
+    "ax2.set_xlim([0, 100]), ax2.set_ylim([0, 100]);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}