In [1]:
import os
import sys
import numpy as np
import json
from scipy import misc
from matplotlib import pyplot as plt
sys.path.append('~/Tan_working')

In [None]:
from modis_utils.preprocessing.image_processing import mask_cloud_and_water
from modis_utils.misc import get_im, find_img_name, find_img_name_1
from modis_utils.misc import cache_data, restore_data

# Changeable parameters

In [15]:
modis_product = 'MOD13Q1'
#modis_product = 'ALL'

year_range=(2002, 2018)
n_reservoirs = 40
data_dir = 'raw_data/' + modis_product
mask_data_dir = 'mask_data/' + modis_product

#used_band = 'blue'
used_band = 'NDVI'

preprocessed_dir = 'preprocessed_data/mask_cloud/'

In [3]:
if modis_product == 'ALL':
    day_period = 8
else:
    day_period = 16
    
n_data_per_year = 365//day_period + 1

# Create water/land/cloud mask

In [5]:
if used_band == 'NDVI':
    for reservoir_index in range(n_reservoirs):
        for year in range(year_range[0], year_range[1] + 1):
            for d in range(n_data_per_year):
                day = d*day_period + 1
                prefix = os.path.join(str(reservoir_index), str(year),
                                      str(year) + str(day).zfill(3))
                dir_prefix = os.path.join(data_dir, prefix)
                try:
                    water_cloud_mask = mask_cloud_and_water(dir_prefix, used_band)
                    cur_mask_data_dir = os.path.join(mask_data_dir, prefix)
                    try:
                        os.makedirs(cur_mask_data_dir)
                    except:
                        pass
                    cache_data(water_cloud_mask, os.path.join(cur_mask_data_dir, 'masked.dat'))
                except:
                    #print(reservoir_index, year, day)
                    pass

# Change fill value to -2001

In [5]:
dest_dir = preprocessed_dir + 'change_fill_value/' + modis_product

In [6]:
for reservoir_index in range(n_reservoirs):
    for year in range(year_range[0] - 1, year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d*day_period + 1
            prefix = os.path.join(str(reservoir_index), str(year),
                                  str(year) + str(day).zfill(3))
            dir_prefix = os.path.join(data_dir, prefix)
            try:
                img_dir = find_img_name_1(data_dir=data_dir,
                                          reservoir_index=reservoir_index,
                                          band_find=used_band,
                                          year=year, day=day)
                img = get_im(os.path.join(dir_prefix, img_dir))

                img[img == -3000] = -2001
                cur_dest_dir = os.path.join(dest_dir, str(reservoir_index), str(year),
                                            str(year) + str(day).zfill(3))
                try:
                    os.makedirs(cur_dest_dir)
                except:
                    pass
                cache_data(img, os.path.join(cur_dest_dir, img_dir[:-4] + '.dat'))
            except:
                #print(reservoir_index, year, day)
                pass

# Normalize data: (img - mean)/std

In [16]:
data_dir = preprocessed_dir + 'change_fill_value/' + modis_product
dest_dir = preprocessed_dir + 'normalized/' + modis_product

## Calculate mean and std

In [17]:
mean_std_dict = {}
for reservoir_index in range(n_reservoirs):
    total_img = []
    for year in range(year_range[0], year_range[0] + 1):
        for d in range(1):
            day = d*day_period + 1
            img_dir = find_img_name(data_dir=data_dir,
                                    reservoir_index=reservoir_index,
                                    band_find=used_band,
                                    year=year, day=day)
            img = restore_data(os.path.join(img_dir))
            total_img.append(img)
    total_img = np.vstack(total_img)
    mean_std_dict[reservoir_index] = {'mean': np.mean(total_img),
                                      'std': np.std(total_img)}

In [18]:
mean_std_dir = os.path.join('mean_std', data_dir)
try:
    os.makedirs(mean_std_dir)
except:
    pass

cache_data(mean_std_dict, os.path.join(mean_std_dir, 'mean_std.dat'))

In [19]:
mean_std_dict

{0: {'mean': 4451.6206, 'std': 2728.1873},
 1: {'mean': 4861.3423, 'std': 2400.249},
 2: {'mean': 6645.236, 'std': 2422.827},
 3: {'mean': 3869.2444, 'std': 1798.6421},
 4: {'mean': 4162.976, 'std': 1522.1868},
 5: {'mean': 4263.289, 'std': 1863.2147},
 6: {'mean': 6382.211, 'std': 1288.4972},
 7: {'mean': 3887.5776, 'std': 2072.7852},
 8: {'mean': 4164.7646, 'std': 1830.4982},
 9: {'mean': 5707.8745, 'std': 1810.8042},
 10: {'mean': 4088.6785, 'std': 1249.143},
 11: {'mean': 3927.2249, 'std': 1705.9479},
 12: {'mean': 3306.905, 'std': 1941.948},
 13: {'mean': 3971.859, 'std': 2198.2883},
 14: {'mean': 3486.9578, 'std': 1823.9814},
 15: {'mean': 5199.7905, 'std': 1689.0787},
 16: {'mean': 3832.5652, 'std': 1456.273},
 17: {'mean': 4541.2236, 'std': 1382.1403},
 18: {'mean': 6119.65, 'std': 2288.0142},
 19: {'mean': 2909.4187, 'std': 1848.3088},
 20: {'mean': 4181.6343, 'std': 1707.2988},
 21: {'mean': 4927.366, 'std': 2069.0547},
 22: {'mean': 4354.4624, 'std': 2515.2427},
 23: {'mean'

## Create normalized images

In [None]:
for reservoir_index in range(n_reservoirs):
    mean = mean_std_dict[reservoir_index]['mean']
    std = mean_std_dict[reservoir_index]['std']
    for year in range(year_range[0], year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d*day_period + 1
            prefix = os.path.join(str(reservoir_index), str(year),
                                  str(year) + str(day).zfill(3))
            dir_prefix = os.path.join(data_dir, prefix)
            try:
                img_dir = find_img_name_1(data_dir=data_dir,
                                          reservoir_index=reservoir_index,
                                          band_find=used_band,
                                          year=year, day=day)
                img = restore_data(os.path.join(dir_prefix, img_dir))

                normalized_img = (img - mean)/std
                cur_dest_dir = os.path.join(dest_dir, str(reservoir_index), str(year),
                                            str(year) + str(day).zfill(3))
                try:
                    os.makedirs(cur_dest_dir)
                except:
                    pass
                cache_data(normalized_img, os.path.join(cur_dest_dir, img_dir))
            except:
                #print(reservoir_index, year, day)
                pass

## Create normalized images by division 10000

In [None]:
data_dir = preprocessed_dir + 'change_fill_value/' + modis_product
dest_dir = preprocessed_dir + 'normalized_div/' + modis_product

In [None]:
for reservoir_index in range(4, n_reservoirs):
    for year in range(year_range[0], year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d*day_period + 1
            prefix = os.path.join(str(reservoir_index), str(year),
                                  str(year) + str(day).zfill(3))
            dir_prefix = os.path.join(data_dir, prefix)
            try:
                img_dir = find_img_name_1(data_dir=data_dir,
                                          reservoir_index=reservoir_index,
                                          band_find=used_band,
                                          year=year, day=day)
                img = restore_data(os.path.join(dir_prefix, img_dir))

                normalized_img = img/10000
                cur_dest_dir = os.path.join(dest_dir, str(reservoir_index), str(year),
                                            str(year) + str(day).zfill(3))
                try:
                    os.makedirs(cur_dest_dir)
                except:
                    pass
                cache_data(normalized_img, os.path.join(cur_dest_dir, img_dir))
            except:
                #print(reservoir_index, year, day)
                pass

In [39]:
data_dir = 'preprocessed_data/non_mask_cloud/normalized/' + modis_product

In [40]:
min_max_dict = {}
for reservoir_index in range(n_reservoirs):
    total_img = []
    for year in range(year_range[0], year_range[0] + 1):
        for d in range(1):
            day = d*day_period + 1
            img_dir = find_img_name(data_dir=data_dir,
                                    reservoir_index=reservoir_index,
                                    band_find=used_band,
                                    year=year, day=day)
            img = restore_data(os.path.join(img_dir))
            total_img.append(img)
    total_img = np.vstack(total_img)
    min_max_dict[reservoir_index] = {'min': np.min(total_img),
                                     'max': np.max(total_img)}
    
min_max_dir = os.path.join('min_max', data_dir)
try:
    os.makedirs(min_max_dir)
except:
    pass

#cache_data(min_max_dict, os.path.join(min_max_dir, 'min_max.dat'))

In [41]:
min_max_dict

{0: {'min': -1.8399645, 'max': 1.6867199},
 1: {'min': -2.4132743, 'max': 1.6878362},
 2: {'min': -3.249838, 'max': 1.2729893},
 3: {'min': -3.040554, 'max': 2.4678233},
 4: {'min': -3.9231975, 'max': 2.9116604},
 5: {'min': -3.0478222, 'max': 2.3930225},
 6: {'min': -6.7657175, 'max': 2.7251136},
 7: {'min': -2.6247876, 'max': 1.9839951},
 8: {'min': -3.3144531, 'max': 2.1920333},
 9: {'min': -3.527634, 'max': 1.4317882},
 10: {'min': -4.8750854, 'max': 3.3305411},
 11: {'min': -3.4629521, 'max': 1.9214604},
 12: {'min': -2.4016418, 'max': 1.7755479},
 13: {'min': -2.674013, 'max': 2.5543497},
 14: {'min': -2.8298082, 'max': 2.8262644},
 15: {'min': -3.9777837, 'max': 1.8982002},
 16: {'min': -4.0058184, 'max': 2.205929},
 17: {'min': -4.4645166, 'max': 3.1125612},
 18: {'min': -3.5196378, 'max': 1.5141374},
 19: {'min': -2.422828, 'max': 1.7056148},
 20: {'min': -3.474518, 'max': 2.098957},
 21: {'min': -3.3399713, 'max': 1.3884627},
 22: {'min': -2.526779, 'max': 1.4899309},
 23: {'