## **A few useful features from the exercises**

#### Conflict mapping

In [None]:
import zipfile
import glob

# Extracting .zip files in python

files = glob.glob("data/*.zip")

for file in files:
    with zipfile.ZipFile(file, 'r') as zip_ref:
        zip_ref.extractall("data/raw")

In [None]:
# Extracting data using the .str.contains method
data_ipis = data_visits[data_visits['project'].str.contains('IPIS')]

In [None]:
# For those mining sites that were visited multiple times, take only the last visit
# Sort values (so they are ascending), group by mine name, select the last value i.e. most recent
data_ipis_lastvisit = data_ipis.sort_values('visit_date').groupby('pcode', as_index=False).last()
# Have to reconvert to dataframe since the groupby remove this geometry
data = geopandas.GeoDataFrame(data_ipis_lastvisit)
# Have to define a crs
data.crs = data_visits.crs

In [None]:
# Have to access the geometry of the attributes then squeeze them to create an individual polygon that can be used for further calculations (e.g. within)
# This creates a single Shapely Polygon feature in this case
kahuzi = protected_areas_utm[protected_areas_utm['NAME_AP'] == "Kahuzi-Biega National park"].geometry.squeeze()

In [None]:
# Extracting/subsetting features from a geodataframe based on a geometric predicate on the same geodataframe
mines_kahuzi = data_utm[data_utm.within(kahuzi)]

In [None]:
# Take an individual geometry and a geometry collection
def closest_protected_area(mine, protected_areas):
    # Calculate distances from the geometry to each geometry in the collection
    dist = protected_areas.distance(mine)
    # Extract the index at the minimum value
    idx = dist.idxmin()
    # Extract the name of the feature at this index
    closest_area = protected_areas.loc[idx, 'NAME_AP']
    return closest_area

# .apply on the geometry column with a lambda function. For each site, run the function for that site and the protected_areas_utm file
result = data_utm.geometry.apply(lambda site: closest_protected_area(site, protected_areas_utm))

In [None]:
# Spatial join on two geodataframes (subsetting one geodataframe to reduce cluttering)
# Within predicate selecting only values which appear in both dataframes (inner)
data_within_protected = geopandas.sjoin(data_utm, protected_areas_utm[['NAME_AP', 'geometry']],
                                        predicate='within', how='inner')

In [None]:
# Two means of getting the count of each value according to a category
data_within_protected.groupby("NAME_AP").size()
data_within_protected["NAME_AP"].value_counts()

# Another useful way of doing a quick query on an individual column according to categories (protected areas)
data_within_protected.groupby("NAME_AP")["workers_numb"].sum()

In [None]:
# Creating a copy of features to reduce cluttering without overwriting the original data
protected_areas_border = protected_areas_utm[['NAME_AP', 'geometry']].copy()

In [None]:
# A buffer then a difference calculation to extract the 10000m border around the features
# Have to pass the original features as a unary_union (multipary feature?) possibly because of geometry errors
protected_areas_border['geometry'] = protected_areas_border.buffer(10000).difference(protected_areas_utm.unary_union)

In [None]:
# Another spatial join to extract the features from the data_utm layer within the protected_areas_border layer
data_within_border = geopandas.sjoin(data_utm, protected_areas_border,
                                     predicate="within", how="inner")

In [None]:
# Constructing a geodataframe from a dataframe with coordinates
df = pd.DataFrame(
    {'City': ['Buenos Aires', 'Brasilia', 'Santiago', 'Bogota', 'Caracas'],
     'Country': ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela'],
     'Latitude': [-34.58, -15.78, -33.45, 4.60, 10.48],
     'Longitude': [-58.66, -47.91, -70.66, -74.08, -66.86]})


# Create new combined coordinates column by zipping longitude and latitude coordinates
df['Coordinates']  = list(zip(df.Longitude, df.Latitude))

# Use the shapely Point (or other as required) function to turn these coordinates into geometry features
df['Coordinates'] = df['Coordinates'].apply(Point)

# Create the geodataframe by specifying the coordinates column as the geometry (will also have to define CRS)
gdf = geopandas.GeoDataFrame(df, geometry='Coordinates')