# **Module 1: Introduction to Spatial Data Analysis in Python**

In [None]:
import geopandas as gpd
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import rasterio
import numpy as np
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap
import matplotlib.colors as colors
from rasterio.crs import CRS
from rasterio.plot import plotting_extent
sns.set_style("whitegrid")

## **Exercises**

### Data
-  `ea_geo.csv` - Malawi Living Standard Measurement Survey Integrated Household Sample (LSMS-IHS) point data are available from https://microdata.worldbank.org/index.php/catalog/3818  
- `mwi_lsms.shp` - Malawi subnational divisions can be downloaded from https://data.humdata.org/dataset/malawi-administrative-level-0-3-boundaries
- `MWI_msk_alt.vrt` - Malawi Digital Elevation Model (DEM) is derived from NASA's Shuttle Radar Topography Mission data product. 

For today's workshop, the data has been downloaded, cleaned, transformed, and saved to the directory `./data-module-1/`.

**Question 1. Read and display the head of the `ea_geo.csv` file stored under `./data-module-1/`.**

In [None]:
ea_geo_df = pd.read_csv("./data-module-1/ea_geo.csv")
ea_geo_df.head()

**Question 2. Which columns store the geographic coordinates? Transform `DataFrame` into `GeoDataFrame` by using these columns. Display the head of the `GeoDataFrame`.**

In [None]:
ea_geo_gdf = gpd.GeoDataFrame(ea_geo_df, geometry=gpd.points_from_xy(ea_geo_df.ea_lon_mod, ea_geo_df.ea_lat_mod))
ea_geo_gdf.set_crs("epsg:4326", inplace=True)
ea_geo_gdf.drop(columns=["ea_lon_mod", "ea_lat_mod"], inplace=True)
ea_geo_gdf.head()

**Question 3. Create an interactive map to ensure that the data have been properly geocoded.**

In [None]:
ea_geo_gdf.explore()

**Question 4. Create a static map with 2 subplots: based on columns `dist_road` and `dist_border`. Display values as continuous.**

In [None]:
fig, axs = plt.subplots(1,2, figsize=(10,8))
ea_geo_gdf.plot(ax=axs[0], column="dist_road", legend=True, cmap="Spectral")
axs[0].set_title("Malawi LSMS distance to road", weight="bold")
ea_geo_gdf.plot(ax=axs[1], column="dist_border", legend=True, cmap="plasma")
axs[1].set_title("Malawi LSMS distance to border", weight="bold")

**Question 5. Read the `mwi_lsms.shp` shapefile located in the `./data-module-1/` folder. Display the first 2 records of `GeoDataFrame`.**

In [None]:
mwi_lsms_gdf = gpd.read_file("./data-module-1/mwi_lsms.shp")
mwi_lsms_gdf.head(2)

**Question 6. Explore the characteristics of the `GeoDataFrame`.**  
- How many columns does it have?
- How many rows does it have?
- Calculate summary statistics of the numerical fields.

In [None]:
print ("Number of columns:", len(mwi_lsms_gdf.columns))
print ("Number of rows:", len(mwi_lsms_gdf))
print ("Summarize numerical fields")
mwi_lsms_gdf.describe()

**Question 7. Create an interactive map of the `GeoDataFrame`.**

In [None]:
mwi_lsms_gdf.explore()

**Question 8. Create a static map with 2 subplots: based on columns `croplnd` and `poverty`. Display values as continuous.**

In [None]:
fig, axs = plt.subplots(1,2, figsize=(10,8))
mwi_lsms_gdf.plot(ax=axs[0], column="croplnd", legend=True, cmap="Spectral")
axs[0].set_title("Malawi LSMS croplnd", weight="bold")
mwi_lsms_gdf.plot(ax=axs[1], column="poverty", legend=True, cmap="plasma")
axs[1].set_title("Malawi LSMS poverty", weight="bold")

**Question 9. Read the Malawi DEM raster file stored as `MWI_msk_alt.vrt`. Convert the data type to float and reset `NoData` values to `np.nan`.**

In [None]:
mwi_dem =  rasterio.open("./data-module-1/MWI_msk_alt.vrt")
mwi_dem_array = mwi_dem.read(1)
mwi_dem_array = mwi_dem_array.astype(float)
mwi_dem_array[mwi_dem_array==mwi_dem.nodata] = np.nan

**Question 10. Explore the characteristics of your raster.**
- How many rows and columns does it have?
- What is the spatial extent of the dataset?
- What is the coordinate reference system?
- Plot a histogram to display the distribution of values.

In [None]:
print ("Number of rows is equal to {}".format(mwi_dem.height))
print ("Number of columns is equal to {}".format(mwi_dem.width))
print ("Extent of the dataset: {}".format(mwi_dem.bounds))
print ("Coordinate Reference System: {}".format(mwi_dem.crs))
plt.hist(mwi_dem_array.flatten(), facecolor="grey", alpha=0.75)

**Question 11. Plot Malawi DEM array. Use `terrain` as a `cmap` option.**

In [None]:
plt.figure(figsize = (10,8))
plt.imshow(mwi_dem_array, interpolation="none", cmap="terrain")
plt.title("Malawi DEM, m", weight="bold")
plt.colorbar()

**Question 12. Display multiple features on the same map:**
- add Malawi DEM (use the coorditate system from this dataset as a reference for others), use `terrain` as `cmap` option;
- add Malawi district boundaries (polygon), display only the edges with `grey` color;
- add Malawi LSMS points, display `dist_agmrkt` column, include the legend, and keep the default `cmap`.

In [None]:
fig, ax = plt.subplots(figsize = (10,8))
plot_extent = plotting_extent(mwi_dem_array, mwi_dem.transform)
ax.imshow(mwi_dem_array, interpolation="none", cmap="terrain", extent=plot_extent)
mwi_lsms_gdf.to_crs(mwi_dem.crs).plot(ax=ax, facecolor="none", edgecolor="grey")
ea_geo_gdf.to_crs(mwi_dem.crs).plot(ax=ax, column="dist_agmrkt", legend=True)
plt.title("Malawi DEM and LSMS data (dist_agmrkt)", weight="bold")
plt.show()