In [None]:
# import default madules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from builtins import range
import os, sys
from pathlib import Path
import json
import traceback

# import geographic modules
import rasterio
from rasterio.mask import mask
import rasterio
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.mask import mask
from affine import Affine
import mercantile
from rasterio.enums import Resampling
from rasterio.vrt import WarpedVRT
from shapely.geometry import box
import geopandas as gpd
from fiona.crs import from_epsg
import pycrs
from osgeo import ogr, gdal
import geojson
from geojson import Polygon, MultiPolygon
from matplotlib.pyplot import figure
import geopandas as gpd
import geoplot
import fiona
import pprint
import shapely
import pyproj
from PIL import Image
from skimage import data
from skimage.util.dtype import dtype_range
from skimage.util import img_as_ubyte
from skimage import exposure
from skimage.morphology import disk
from skimage.filters import rank

# import keras modules
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras import utils
from keras import callbacks
from keras.callbacks import EarlyStopping, ModelCheckpoint
# from keras.callbacks.callbacks import LearningRateSchedulerPerBatch
from keras import backend as K

line_filler = '-'*75

## Build path for relevant files from each country subfolder

In [None]:
dirs = [entry.path for entry in os.scandir('stac') if entry.is_dir()]
all_dirs = []

for a_dir in dirs:
    sub_dirs = [entry.path for entry in os.scandir(a_dir) if entry.is_dir()]
    for pathy in sub_dirs:
        all_dirs.append(pathy)

In [None]:
all_dirs

In [None]:
geojson_train_files = []
geojson_test_files = []
tif_image_files = []
list_of_tif_geojson_files = []

for dir_path in all_dirs:
    this_dict = {}
    this_path = Path(dir_path)
    locale = this_path.parts[-1]
    print(f'Current path is {this_path}')
    print(f'Locale of the current path {locale}')
    files = os.listdir(this_path)
    for file in files:
        this_dict['batch_path'] = this_path
        if file == 'train-' + locale + '.geojson':
            geojson_train_files.append(os.path.join(this_path, file))
            this_dict['train_file'] = file
        elif file == 'test-' + locale + '.geojson':
            geojson_test_files.append(os.path.join(this_path, file))
            this_dict['test_file'] = file
        elif '.tif' in file and '4326' in file:
            tif_image_files.append(os.path.join(this_path, file))
            this_dict['tif_file'] = file
        else:
            pass
    list_of_tif_geojson_files.append(this_dict)

In [None]:
list_of_tif_geojson_files

In [None]:
tif_geojson_df = pd.DataFrame(list_of_tif_geojson_files)

In [None]:
tif_geojson_df = tif_geojson_df.dropna()
tif_geojson_df

In [None]:
tif_geojson_df['batch_path'].iloc[4]

In [None]:
tif_image_files

In [None]:
tif_image_files[0]

In [None]:
geojson_train_files[0]

In [None]:
geojson_test_files[0]

## Import training geojson files

In [None]:
with open(geojson_train_files[0]) as data_file:    
    json_data = geojson.load(data_file)

In [None]:
test_geom = [json_data[0]['geometry']]

In [None]:
test_geom

In [None]:
feature_list = []

for feature in json_data['features']:
    this_dict = {}
    this_dict['coordinates'] = feature['geometry']
    this_dict['id'] = feature['properties']['id']
    this_dict['material'] = feature['properties']['roof_material']
    this_dict['verified'] = feature['properties']['verified']
    feature_list.append(this_dict)

In [None]:
feature_df = pd.DataFrame(feature_list)

In [None]:
feature_df.info()

In [None]:
feature_df['polygons'] = feature_df['coordinates'].apply(lambda x: len(x['coordinates'][0]))

In [None]:
feature_df.head()

In [None]:
ds = gdal.Open(tif_image_files[0])
gt = ds.GetGeoTransform()

In [None]:
gt

In [None]:
gt[0]

In [None]:
gt[1]

In [None]:
gt[3]

In [None]:
gt[5]

In [None]:
bands = ds.RasterCount

In [None]:
band = ds.GetRasterBand(1)

In [None]:
band

In [None]:
# data = band.ReadAsArray(0, 0, image_file.width, image_file.height)

In [None]:
# data

In [None]:
def crop_image_with_geoms(geom_array, image_file):
    out_img, out_transform = mask(raster=image_file, shapes=coords, crop=True)

In [None]:
def get_features(gdf):
    """Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
    return [json.loads(gdf.to_json())['features'][0]['geometry']]

In [None]:
image_file = rasterio.open(tif_image_files[0])

In [None]:
type(image_file)

In [None]:
# show(image_file)
# show((image_file, 1), cmap='terrain')

In [None]:
print(image_file.crs)
print(image_file.transform)
print(image_file.width)
print(image_file.height)
print(image_file.bounds)
print(image_file.meta)

In [None]:
reproj_image_file = rasterio.open('stac\\colombia\\borde_rural\\borde_rural_ortho_cog-epsg4326_reproj.tif')


In [None]:
type(reproj_image_file)

In [None]:
print(reproj_image_file.crs)
print(reproj_image_file.transform)
print(reproj_image_file.width)
print(reproj_image_file.height)
print(reproj_image_file.bounds)
print(reproj_image_file.meta)

In [None]:
reproj_image_file

In [None]:

# figure(num=None, figsize=(, 6), dpi=80, facecolor='w', edgecolor='k')
fig = plt.figure(figsize=(26,42))
reproj_thumb = reproj_image_file.read(1,  out_shape=(1, int(reproj_image_file.height // 12), int(reproj_image_file.width // 12)))
plt.imshow(reproj_thumb)
plt.colorbar()
plt.title(f'Minimum resolution band {thumb.shape}\n')
plt.xlabel('Column #')
plt.ylabel('Row #')
plt.show()

In [None]:
overview = image_file.overviews(1)
least_v = overview[-2]
thumb = image_file.read(1, out_shape=(1, int(image_file.height // least_v), int(image_file.width // least_v)))
print(thumb)

In [None]:
fig = plt.figure(figsize=(25,45))
plt.imshow(thumb)
plt.colorbar()
plt.title(f'Minimum resolution band {thumb.shape}\n')
plt.xlabel('Column #')
plt.ylabel('Row #')
plt.show()

In [None]:

buildings = gpd.read_file(geojson_train_files[0])
print(buildings.head())

In [None]:
geojson_train_files[0]

In [None]:
geoplot.polyplot(buildings, figsize=(20,15))

In [None]:
def get_features(gdf_json, i):
    """Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
    return [gdf_json['features'][i]['geometry']]

def bounding_box(points):
    try:
        x_coords, y_coords = [], []
        for i in range(len(points)):
            row_x, row_y = zip(building_coords[0]['coordinates'][0][i])
            x_coords.append(row_x[0])
            y_coords.append(row_y[0])
    #     x_coordinates, y_coordinates = zip(points)
        return [(min(x_coords), min(y_coords)), (max(x_coords), max(y_coords))]
    except Exception:
        print(traceback.format_exc())
        return None

In [None]:
buildings_json = json.loads(buildings.to_json())
building_coords = get_features(buildings_json, 0)
# type(buildings)

In [None]:
len(buildings_json['features'])

In [None]:
for i in range(len(tif_geojson_df)):
    print(tif_geojson_df.iloc[i])

## Extract building .tif files from highres tif using geojson coordinates

In [None]:
try:
    for i in range(len(tif_geojson_df.dropna())):
        batch_path = tif_geojson_df['batch_path'].iloc[i]
        print(f'Processing file batch in path {batch_path}')
        
        tif_file = os.path.join(batch_path, tif_geojson_df['tif_file'].iloc[i])
        print(f'Tiff filename: {tif_file}')
        reproj_image_file = rasterio.open(tif_file)
        
        print(reproj_image_file.crs)
        print(reproj_image_file.transform)
        print(reproj_image_file.width)
        print(reproj_image_file.height)
        print(reproj_image_file.bounds)
        print(reproj_image_file.meta)
        
        train_json = os.path.join(batch_path, tif_geojson_df['train_file'].iloc[i])
        print(f'Training GeoJson filename: {train_json}')
        
        test_json = os.path.join(batch_path, tif_geojson_df['test_file'].iloc[i])
        print(f'Testing GeoJson filename: {test_json}\n')

        for json_group in [[train_json, 'train'], [test_json, 'test']]:
            model_path = json_group[1]
            print(f'Processing {model_path}ing files')
            buildings = gpd.read_file(json_group[0])
            buildings_json = json.loads(buildings.to_json())

            for i in range(len(buildings_json['features'])):
                building_id = buildings_json['features'][i]['properties']['id']
                if model_path == 'train':
                    building_material = buildings_json['features'][i]['properties']['roof_material']
                else:
                    building_material = ''
                print(f'Object id {building_id} has material type {building_material}')

                building_coords = get_features(buildings_json, i)
                print(f'Building origin coordinates:\n {building_coords}')
                box_coords = bounding_box(building_coords[0]['coordinates'][0])
                
                if box_coords:
                    bbox = box(box_coords[0][0], box_coords[0][1], box_coords[1][0], box_coords[1][1])
                    print(f'Object id {building_id} has boudning box:\n {bbox}')

                    geo_df = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=reproj_image_file.crs.data)
    #                 building_geo_coords = get_features(json.loads(geo.to_json()), i)

                    out_arr, out_transform = mask(reproj_image_file, shapes=building_coords, crop=True)
                    print('These are the attributes of the masked image space:')
                    print(type(out_arr), out_arr.shape, out_arr.dtype, out_transform)

                    out_meta = reproj_image_file.meta.copy()
                    out_meta.update({"driver": "GTiff",
                                    "height": out_arr.shape[1],
                                     "width": out_arr.shape[2],
                                     "transform": out_transform})

                    temp_file_name = building_id + '.tif'

                    if model_path == 'train':
                        folder_path = os.path.join(model_path, building_material)
                    else:
                        folder_path = model_path

                    if not os.path.exists(folder_path):
                        os.makedirs(folder_path)

                    file_path = os.path.join(folder_path, temp_file_name)
                    
                    if os.path.exists(file_path):
                        pass
                    else:
                        print(f'Writing clipped image file to: {folder_path}')
                        with rasterio.open(file_path, 'w', **out_meta) as dest:
                            dest.write(out_arr)
                else:
                    print(f'No coordinate bounds found for building {building_id}')
                
                print(line_filler, '\n')
except Exception:
        print(line_filler)
        print(traceback.format_exc())
        print(line_filler)

In [None]:
buildings_json['features'][2]

In [None]:
building_coords

In [None]:
coords_matrix = building_coords[0]['coordinates'][0]

In [None]:
row_x, row_y = zip(building_coords[0]['coordinates'][0][0])
print(row_x, row_y)   


In [None]:
x

In [None]:
material_type = buildings_json['features'][0]['properties']['roof_material']

In [None]:
material_type

In [None]:
objectid = buildings_json['features'][0]['properties']['id']

In [None]:
objectid

In [None]:
building_coords[0]['coordinates'][0]

In [None]:
box_coord = bounding_box(building_coords[0]['coordinates'][0])

In [None]:
# box_coord = bounding_box(building_coords[0]['coordinates'][0][0])

In [None]:
box_coord

In [None]:
bbox = box(box_coord[0][0], box_coord[0][1], box_coord[1][0], box_coord[1][1])

In [None]:
geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=reproj_image_file.crs.data)

In [None]:
coords = get_features(json.loads(geo.to_json()), 0)

In [None]:
coords

In [None]:


geojson = fiona.open(geojson_train_files[0], "r")

with fiona.open(geojson_train_files[0], "r") as geojson:
    features = [feature["geometry"] for feature in geojson]
    pprint.pprint(features[0:5])

In [None]:
x = []
y = []
for coord_pair in features[0]['coordinates'][0]:
    print(coord_pair)
    x.append(coord_pair[0])
    y.append(coord_pair[1])
# for feature in features[0]['coordinates'][0]:
#     print(feature)

In [None]:
x, y

In [None]:
type(geojson)

In [None]:
in_file = tif_image_files[0]
in_file = in_file.split('\\')[-1]
out_file = in_file.replace('cog', 'cog_wgs84_reproj')
print(in_file, out_file)

In [None]:
reproj_image_file.shape

In [None]:
box_coord

In [None]:
out_arr, out_transform = mask(reproj_image_file, shapes=coords, crop=True)

In [None]:
out_arr.shape, out_arr.dtype

In [None]:
out_transform

In [None]:
out_meta = reproj_image_file.meta.copy()
out_meta.update({"driver": "GTiff",
                "height": out_arr.shape[1],
                 "width": out_arr.shape[2],
                 "transform": out_transform})

In [None]:
out_file = f'{objectid}.tif'

with rasterio.open(f'{out_file}', 'w', **out_meta) as dest:
    dest.write(out_arr)

## Testing looking at a single file, and some filtering options

In [None]:
test_file = 'train//concrete_cement//7a1c6d7c_concrete_cement.tif'


In [None]:
img_arr = plt.imread(test_file)


In [None]:
# print(f'Shape of old array: {img_arr.shape}')

# image_width = img_arr.shape[0]
# image_length = img_arr.shape[1]
# image_layers = img_arr.shape[2]

# img_arr_median = np.zeros((image_width, image_length, image_layers))
# print(f'Shape of new array: {img_arr_median.shape}')

# img_arr_median

# for l in range(image_layers-1):
#     layer_median_pixel_val = np.median(img_arr[:,:,l])
#     print(f'Median pixel value on layer {l+1} is {layer_median_pixel_val}')
    
#     for i in range(image_width):
#         for j in range(image_length):
#             print(f'Before: {img_arr[i,j,l]}')
#             if img_arr[i,j,l] - layer_median_pixel_val < 0:
#                 img_arr_median[i,j,l] = 0
#             else:
#                 img_arr_median[i,j,l] = img_arr[i,j,l] - layer_median_pixel_val
#             print(f'After: {img_arr_median[i,j,l]}')

In [None]:
img_arr[:,:,0:3].shape

In [None]:
plt.imshow(img_arr[:,:,0:3])
plt.show()

In [None]:
type(img_arr), img_arr.shape, img_arr.dtype

In [None]:
p2, p98 = np.percentile(img_arr, (2,98))
img_arr_rescale_intensity = exposure.rescale_intensity(img_arr, in_range=(p2,p98))
plt.imshow(img_arr_rescale_intensity)
plt.show()

In [None]:
img_arr_rescale = exposure.equalize_hist(img_arr)
plt.imshow(img_arr_rescale)
plt.show()

In [None]:
selem = disk(30)
img_arr_adapt_equal = exposure.equalize_adapthist(img_arr, clip_limit = .03)
plt.imshow(img_arr_adapt_equal)
plt.show()

## Buildig an array with all the original images to split into train/test sets

### This next cell was strictly to rename the .tif files to remove material type, leaving just building id

In [None]:
# for path in train_paths:
#     for file in os.listdir(path):
#         print(file)
#         new_file_name = file.split('_')[0] + '.tif'
#         print(new_file_name)
        
#         old_path_file = os.path.join(path, file)
#         new_path_file = os.path.join(path, new_file_name)
        
#         os.rename(old_path_file, new_path_file)

In [None]:
import skimage
image = plt.imread(test_path)
size = 150, 150
image_resized = skimage.transform.resize(image, size)
plt.imshow(image_resized)
plt.show()

In [None]:
root_train_path = 'train/'
material_folders = [sub_path for sub_path in os.listdir('train')]
material_folders

In [None]:
x_train_dict_list = []
y_train_dict_list = []
size = 150, 150

for material_type in material_folders:
    sub_train_path = os.path.join(root_train_path, material_type)
    for img_file in os.listdir(sub_train_path):
        file_path = os.path.join(sub_train_path, img_file)
        img = plt.imread(file_path)
        img_resize = skimage.transform.resize(image, size)
        img_arr = img_resize[:,:,0:3]
        building_id = img_file.replace('.tif','')
        
        img_dict = {
            'building_id': building_id,
            'material_type': material_type,
            'image_data': img_arr
        }
        x_train_dict_list.append(img_dict)
        
        label_dict = {
            'building_id': building_id,
            'concrete_cement': 0.0, 
            'healthy_metal': 0.0, 
            'incomplete': 0.0, 
            'irregular_metal': 0.0, 
            'other': 0.0
        }
        label_dict[material_type] = 1.0
        y_train_dict_list.append(label_dict)

In [None]:
x_train_dict_list[0]['image_data'].shape

In [None]:
x_train_array = []

for i in range(len(x_train_dict_list)):
    print(f'Appending index {i}')
    x_train_array.append(x_train_dict_list[i]['image_data'])
    
x_train_array = np.array(x_train_array)

In [None]:
x_train_array.shape

In [None]:
y_train_array = []

for i in range(len(y_train_dict_list)):
    print(f'Appending index {i}')
    material_val_list = [y_train_dict_list[i][key] for key in material_folders]
    y_train_array.append(material_val_list)
    
y_train_array = np.array(y_train_array)

In [None]:
y_train_array.shape

In [None]:
x_train_df = pd.DataFrame(x_train_dict_list)

In [None]:
y_train_df = pd.DataFrame(y_train_dict_list)

In [None]:
x_train_df.loc[0]

In [None]:
# X = x_train_df['image_data'].values
# y = y_train_df[material_folders].values
X = x_train_array
y = y_train_array

In [None]:
X.shape

In [None]:
y.shape

In [None]:
np.sum(y[:,0]), np.sum(y[:,1]), np.sum(y[:,2]), np.sum(y[:,3]), np.sum(y[:,4])

In [None]:
# from imblearn.over_sampling import SMOTE
# sm = SMOTE(random_state=42)
# X_res, y_res = sm.fit_resample(X, y)

# np.sum(y_res[:,0]), np.sum(y_res[:,1]), np.sum(y_res[:,2]), np.sum(y_res[:,3]), np.sum(y_res[:,4])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
np.sum(y_train[:,0]), np.sum(y_train[:,1]), np.sum(y_train[:,2]), np.sum(y_train[:,3]), np.sum(y_train[:,4])

In [None]:
# num_classes =  5
# y_train = utils.to_categorical(y_train, num_classes)
# y_test = utils.to_categorical(y_test, num_classes)

In [None]:
y_test[0:5]

## Train and validate a simple convnet

In [None]:
K.clear_session()

In [None]:
# batch_size = 32
# num_classes = 10
# epochs = 100
# data_augmentation = True
# num_predictions = 20
# save_dir = os.path.join(os.getcwd(), 'saved_models')
# model_name = 'keras_cifar10_trained_model.h5'

# # The data, split between train and test sets:
# (x_train, y_train), (x_test, y_test) = cifar10.load_data()
# print('x_train shape:', x_train.shape)
# print(x_train.shape[0], 'train samples')
# print(x_test.shape[0], 'test samples')

# # Convert class vectors to binary class matrices.
# y_train = utils.to_categorical(y_train, num_classes)
# y_test = utils.to_categorical(y_test, num_classes)

In [None]:
y_test[0]

## Simple CNN

In [None]:
print(input_shape, X_train.shape[1:])

In [None]:
# model = Sequential()
# model.add(Conv2D(32, (3, 3), padding='same',
#                  input_shape=X_train.shape[1:]))
# model.add(Activation('relu'))
# model.add(Conv2D(32, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(64, (3, 3), padding='same'))
# model.add(Activation('relu'))
# model.add(Conv2D(64, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Flatten())
# model.add(Dense(512))
# model.add(Activation('relu'))
# model.add(Dropout(0.5))
# model.add(Dense(num_classes))
# model.add(Activation('softmax'))

# model.summary()

# model.compile(loss='categorical_crossentropy',
#               optimizer='adam',
#               metrics=['accuracy'])

## VGG16 Model

In [None]:
model = Sequential([
    Conv2D(64, (3, 3), input_shape=X_train.shape[1:], padding='same',
           activation='relu'),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    Conv2D(128, (3, 3), activation='relu', padding='same',),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    Conv2D(256, (3, 3), activation='relu', padding='same',),
    Conv2D(256, (3, 3), activation='relu', padding='same',),
    Conv2D(256, (3, 3), activation='relu', padding='same',),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    Conv2D(512, (3, 3), activation='relu', padding='same',),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    Flatten(),
#     Dense(4096, activation='relu'),
#     Dense(4096, activation='relu'),
#     Dense(1000, activation='softmax'),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')
])

model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
batch_size = 32
num_classes = 5
epochs = 50
model_save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_trained_vvg16.h5'

# weight_file_name="weights-improvement-{epoch:02d}-{accuracy:.2f}.hdf5"
# weight_file_path = os.path.join(model_save_dir, weight_file_name)
# checkpoint = ModelCheckpoint(weight_file_path, monitor='accuracy', verbose=1, save_best_only=True, mode='max')
# callbacks_list = [checkpoint]

In [None]:
train_datagen = ImageDataGenerator(
        rotation_range=45,
        samplewise_center=True,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_datagen.fit(X_train)

In [None]:
train_generator = train_datagen.flow(
        X_train,
        y_train,
        batch_size=batch_size)
#         class_mode='categorical')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow(
        X_test,
        y_test,
        batch_size=batch_size)
#         class_mode='categorical')

In [None]:
ckpt_callback = ModelCheckpoint(filepath='weights.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss')
# lr_decay_callback = LearningRateSchedulerPerBatch(
#                     lambda step: ((learning_rate - min_learning_rate) * decay_rate ** step + min_learning_rate))
# early_stopping = EarlyStopping(monitor='val_loss', patience=2)
earlystop = EarlyStopping(monitor = 'val_loss',
                          min_delta = 0,
                          patience = 3,
                          verbose = 1,
                          restore_best_weights = True)
callbacks = [ckpt_callback, earlystop]

In [None]:
if not os.path.isdir(model_save_dir):
    os.makedirs(model_save_dir)
model_path = os.path.join(model_save_dir, model_name)

history = model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=800 // batch_size,
        callbacks=callbacks
        )
model.save_weights(os.path.join(model_save_dir, model_name))

In [None]:
# Save model and weights
if not os.path.isdir(model_save_dir):
    os.makedirs(model_save_dir)
model_path = os.path.join(model_save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

In [None]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

def extractc_features(directory, sampple_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=(150,150),
        batch_size=batch_size,
        class_mode='category')
    i=0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i*batch_size: (i+1) * batch_size] = features_batch
        labels[i*batch_size: (i+1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels

In [None]:
train_features, train_labels = extract_features('train/concrete_cement', 2000)
validation_features, validation_labels = extract_features()

In [None]:
from keras.applications import VGG16
from keras import optimizers, models, layers

conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
print(conv_base.summary())

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(5, activation='softmax'))

model.compile(optimizer=optimizers.RMSprop(lr=2e-5), loss='categorical_crossentropy', metrics=['acc'])

model.fit(X_train, y_train, epochs=30, batch_size=20, validation_data=(X_test, y_test))


In [None]:
# Score trained model.
scores = model.evaluate(X_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


# I don't know what's goingon below here :-)

In [None]:
# import os
# in_file = in_file.split('\\')[-1]
# out_file = in_file.replace('cog', 'cog_wgs84_reproj')
# os.sys(f'gdalwarp {in_file} {out_file} -t_srs "+proj=longlat +ellps=WGS84"')

In [None]:
# ds = gdal.Warp(out_file, in_file, dstSRS='EPSG:4326',
#                outputType=gdal.GDT_Int16, xRes=0.00892857142857143, yRes=0.00892857142857143)
# ds = None

In [None]:
# def project_wsg_shape_to_csr(shape, csr):
#     project = lambda x, y: pyproj.transform(
#         pyproj.Proj(init='epsg:32618'),
#         pyproj.Proj(init=csr),
#         x,
#         y
#       )
#     return shapely.ops.transform(project, shape)

# yard=project_wsg_shape_to_csr(features, 'epsg:32618')

In [None]:
# project = lambda x, y: pyproj.transform(
#     pyproj.Proj(init='epsg:3857'),
#     pyproj.Proj(init='epsg:4326'),
#     x,
#     y
# )
# yard = shapely.ops.transform(project, coords)

In [None]:
# project = lambda x, y: pyproj.transform(
#     pyproj.Proj(init='epsg:3857'),
#     pyproj.Proj(init='epsg:4326'),
#     x,
#     y
# )
# yard = shapely.ops.transform(project, geojson)

In [None]:
warped_vrt = WarpedVRT(image_file, crs='EPSG:3857', resampling=Resampling.bilinear)

In [None]:
dst_tile = mercantile.tile(*warped_vrt.lnglat(), 9)
left, bottom, right, top = mercantile.xy_bounds(*dst_tile)

In [None]:
print(left, bottom, right, top)

In [None]:
lng, lat = image_file.lnglat()

In [None]:
print(lng, lat)

In [None]:
tile = mercantile.tile(lng, lat, 11)
merc_bounds = mercantile.xy_bounds(tile)
print(merc_bounds)

In [None]:
i1 = gdal.Open('warp_test.tif')

In [None]:
# from affine import Affine
# import mercantile

# import rasterio
# from rasterio.enums import Resampling
# from rasterio.vrt import WarpedVRT

# with rasterio.open('tests/data/RGB.byte.tif') as src:
#     with WarpedVRT(src, crs='EPSG:3857',
#                    resampling=Resampling.bilinear) as vrt:

#         # Determine the destination tile and its mercator bounds using
#         # functions from the mercantile module.
#         dst_tile = mercantile.tile(*vrt.lnglat(), 9)
#         left, bottom, right, top = mercantile.xy_bounds(*dst_tile)

#         # Determine the window to use in reading from the dataset.
#         dst_window = vrt.window(left, bottom, right, top)

#         # Read into a 3 x 512 x 512 array. Our output tile will be
#         # 512 wide x 512 tall.
#         data = vrt.read(window=dst_window, out_shape=(3, 512, 512))

#         # Use the source's profile as a template for our output file.
#         profile = vrt.profile
#         profile['width'] = 512
#         profile['height'] = 512
#         profile['driver'] = 'GTiff'

#         # We need determine the appropriate affine transformation matrix
#         # for the dataset read window and then scale it by the dimensions
#         # of the output array.
#         dst_transform = vrt.window_transform(dst_window)
#         scaling = Affine.scale(dst_window.num_cols / 512,
#                                dst_window.num_rows / 512)
#         dst_transform *= scaling
#         profile['transform'] = dst_transform

#         # Write the image tile to disk.
#         with rasterio.open('/tmp/test-tile.tif', 'w', **profile) as dst:
#             dst.write(data)

In [None]:
band1 = image_file.read(1)

In [None]:
band1

In [None]:
from rasterio.plot import show_hist


show_hist(reproj_image_file, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype='stepfilled', title="Histogram")

In [None]:
coords = feature_df['coordinates'].iloc[0]

In [None]:
print(type(coords),type(image_file),type(test_geom))

In [None]:
all_bounds = [rasterio.features.bounds(shape) for shape in test_geom]

In [None]:
all_bounds[0]

In [None]:
minx, miny = all_bounds[0][0], all_bounds[0][1]
maxx, maxy = all_bounds[0][2], all_bounds[0][3]
# minx, miny = 24.60, 60.00
# maxx, maxy = 25.22, 60.35
bbox = box(minx, miny, maxx, maxy)

In [None]:
geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=from_epsg(32618))
geo = geo.to_crs(crs=image_file.crs.data)
coords = get_features(geo)

In [None]:
coords

In [None]:
test_geom

In [None]:
out_img, out_transform = mask(image_file, coords, crop=True)

In [None]:
mask = image_file.dataset_mask()

# Extract feature shapes and values from the array.
for geom, val in rasterio.features.shapes(
        mask, transform=image_file.transform):

    # Transform shapes from the dataset's own coordinate
    # reference system to CRS84 (EPSG:4326).
    geom = rasterio.warp.transform_geom(
        image_file.crs, 'EPSG:4326', geom, precision=6)

    # Print GeoJSON shapes to stdout.
    print(geom)

In [None]:
# In [17]: array = raster.read()

# # Calculate statistics for each band
# In [18]: stats = []

# In [19]: for band in array:
#    ....:     stats.append({
#    ....:         'min': band.min(),
#    ....:         'mean': band.mean(),
#    ....:         'median': np.median(band),
#    ....:         'max': band.max()})
#    ....: 

# In [20]: print(stats)