# prepare the functions

In [1]:
import rasterio
import numpy as np
from affine import Affine
from pyproj import Proj, transform

import pandas as pd, json

def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    geojson = {'type':'FeatureCollection', 'features':[]}
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}
        feature['geometry']['coordinates'] = [row[lon],row[lat]]
        for prop in properties:
            feature['properties'][prop] = row[prop]
        geojson['features'].append(feature)
    return geojson


ModuleNotFoundError: No module named 'rasterio'

## read a geo tiff file; sampling points; save the results into geojson file

In [None]:

#
def sampleTifSavePoints(fname, output_filename):
        
    # Read raster
    with rasterio.open(fname) as r:
        T0 = r.transform  # upper-left pixel corner affine transform
        p1 = Proj(r.crs)
        A = r.read()  # pixel values

    # All rows and columns
    cols, rows = np.meshgrid(np.arange(A.shape[2]), np.arange(A.shape[1]))

    # Get affine transform for pixel centres
    T1 = T0 * Affine.translation(0.5, 0.5)
    # Function to convert pixel row/column index (from 0) to easting/northing at centre
    rc2en = lambda r, c: (c, r) * T1

    # All eastings and northings (there is probably a faster way to do this)
    eastings, northings = np.vectorize(rc2en, otypes=[np.float, np.float])(rows, cols)

    # Project all longitudes, latitudes
    p2 = Proj(proj='latlong',datum='WGS84')
    longs, lats = transform(p1, p2, eastings, northings)

    # print(A.shape)
    # print(longs.shape)

    A=A.reshape(-1,1)
    longs=longs.reshape(-1,1)
    lats=lats.reshape(-1,1)

    # print(A.min(), A.shape)

    indexArr = np.argwhere(A < .1)
    #print(indexArr)

    A = np.delete(A, indexArr)
    longs = np.delete(longs, indexArr)
    lats = np.delete(lats, indexArr)
    #print(np.unique(A))
    
    
    index = np.argwhere(A >90)
    A[index] = A[index]-90 
    #print(np.unique(A))

    # print(Label.min(), lats.shape, longs.shape, Label.shape)

    #convert array into pd
    df = pd.DataFrame({'longitude':longs.flatten(),'latitude':lats.flatten(),'Label':A.flatten()})
    # print(df.shape)
    
    #sample 30 points for each class
    #use all the points if fewer than 30
    df_sample=df.groupby('Label', group_keys=False).apply(lambda x: x.sample(min(len(x), 30)))
    #print(df_sample.shape)

    #write pd into geojson  (which can be open in qgis for visualization)  
    cols = ['Label']
    geojson_dict = df_to_geojson(df_sample, cols)
    geojson_str = json.dumps(geojson_dict, indent=2)

    with open(output_filename, "w", encoding="utf8") as output_file:
        output_file.write(geojson_str)
        
    if df_sample.shape[0]==0:
        return 0
    else:
        return 1

# loop the files in a folder

In [10]:
import os

top_folder = './files4shTest/tifUTM/'
for path, dirs, files in os.walk(top_folder):
    
    print(len(files))
    
    for d in files:
#         print(d[:-4])
        
        output_filename = './files4shTest/geojson/'+d[:-4]+ '.geojson'
        fname = top_folder+d[:-4]+ '.tif'
        
        id = sampleTifSavePoints(fname, output_filename)
        
        if id==0:
            print(d[:-4])
            

2
