In [40]:
#Import packages
import os
import re  # regular expressions for getting lat lon grid
import pathlib
import warnings
import gdal
import matplotlib.pyplot as plt
import numpy as np
import numpy.ma as ma
import rasterio as rio # for extracting subsets
from rasterio.plot import plotting_extent #for plotting
import earthpy as et
import earthpy.plot as ep
import earthpy.spatial as es
import earthpy.mask as em
import pandas as pd
import pickle
import matplotlib.patches as mpatches

#for the reprojecting
import pyproj
from pyproj import CRS
from pyproj import Transformer
import cartopy
import cartopy.crs as ccrs
from shapely.geometry import Point, LineString, Polygon
from pyhdf.SD import SD, SDC
import datetime as dt
import scipy
from scipy import interpolate
import geopandas as gpd

#for finding the mode
from collections import Counter

#load in csv of city lons and lats
os.chdir(os.path.join(et.io.HOME, 'Documents', 'Python_Scripts', 'PROJECT'))

CITY_COUNTRY_lat_lon = pd.read_excel('CITY_COUNTRY_lat_lon.xlsx')

In [41]:
#function to extract the subdatasets of interest and return a dataframe 
#NDVI and EVI are vegetation indices
#pixel reliability and VI quality are the quality checks

def vi_dataframe_create(SATELLITE_NDVI, vi_file_name, city_top):
    #set to directory with files
    if city_top == True:
        os.chdir(os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, 'CITY_TOP', SATELLITE_NDVI))
        #path to the file
        data_path = os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, 'CITY_TOP', SATELLITE_NDVI, vi_file_name)
    else:        
        os.chdir(os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, SATELLITE_NDVI))
        #path to the file
        data_path = os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, SATELLITE_NDVI, vi_file_name)
    with rio.open(data_path) as dataset:
    # Loop through each subdataset in HDF4 file
        for name in dataset.subdatasets:
        
        # Use regular expression to identify if subdataset has NDVI in the name
            if re.search("1 km monthly NDVI", name):
        
                # Open the band subdataset
                with rio.open(name) as subdataset:
                    modis_meta = subdataset.profile
                
                    # Read band data as a 2 dim arr and append to list
                    NDVI = subdataset.read(1)
                
            # Use regular expression to identify if subdataset has EVI in the name
            if re.search("1 km monthly EVI", name):
        
                # Open the band subdataset
                with rio.open(name) as subdataset:
                    modis_meta = subdataset.profile
                
                    # Read band data as a 2 dim arr and append to list
                    EVI = subdataset.read(1)
                
       
            # Use regular expression to identify if subdataset has reliability in the name (for pixel reliability)
            if re.search("1 km monthly pixel reliability", name):
        
                # Open the band subdataset
                with rio.open(name) as subdataset:
                    modis_meta = subdataset.profile
                
                    # Read band data as a 2 dim arr and append to list
                    pixel_reliability = subdataset.read(1)
                
                
              # Use regular expression to identify if subdataset has quality in the name (for VI Quality)
            if re.search("1 km monthly VI Quality", name):
        
                # Open the band subdataset
                with rio.open(name) as subdataset:
                    modis_meta = subdataset.profile
                
                    # Read band data as a 2 dim arr and append to list
                    VI_quality = subdataset.read(1)       
                
                
    #Create the coordinate grid
    # Identify the data field- use the NDVI field but grid is same for all data
    DATAFIELD_NAME = '1 km monthly EVI'

    if SATELLITE_NDVI == 'MOD13A3':
        GRID_NAME = 'MOD_Grid_monthly_1km_VI'
    else:
        GRID_NAME = 'MYD_Grid_monthly_1km_VI'
        
    hdf = SD(vi_file_name, SDC.READ)

    # Read dataset.
    data2D = hdf.select(DATAFIELD_NAME)
    data = data2D[:,:].astype(np.float64)

    # Read global attribute.
    fattrs = hdf.attributes(full=1)
    ga = fattrs["StructMetadata.0"]
    gridmeta = ga[0]

    # Construct the grid.  Required information in global attribute called 'StructMetadata.0'

    ul_regex = re.compile(r'''UpperLeftPointMtrs=\(
                                  (?P<upper_left_x>[+-]?\d+\.\d+)
                                  ,
                                  (?P<upper_left_y>[+-]?\d+\.\d+)
                                  \)''', re.VERBOSE)
    match = ul_regex.search(gridmeta)
    x0 = np.float(match.group('upper_left_x')) 
    y0 = np.float(match.group('upper_left_y')) 

    lr_regex = re.compile(r'''LowerRightMtrs=\(
                                  (?P<lower_right_x>[+-]?\d+\.\d+)
                                  ,
                                  (?P<lower_right_y>[+-]?\d+\.\d+)
                                  \)''', re.VERBOSE)
    match = lr_regex.search(gridmeta)
    x1 = np.float(match.group('lower_right_x')) 
    y1 = np.float(match.group('lower_right_y')) 
    ny, nx = data.shape
    xinc = (x1 - x0) / nx
    yinc = (y1 - y0) / ny

    x = np.linspace(x0, x0 + xinc*nx, nx)
    y = np.linspace(y0, y0 + yinc*ny, ny)
    xv, yv = np.meshgrid(x, y)

    # convert the grid back to lat/lons.
    transformer = Transformer.from_crs("+proj=sinu +R=6371007.181 +nadgrids=@null +wktext", "EPSG:4326")
    lat, lon = transformer.transform(xv, yv)

    #Apply scale factors
    scale_factor_NDVI = 0.0001
    scale_factor_EVI = 0.0001

    NDVI = NDVI*scale_factor_NDVI
    EVI = EVI*scale_factor_EVI

    #Create the lists to be combined to create a dataframe
    NDVI_list = NDVI.flatten()
    EVI_list = EVI.flatten()
    pixel_reliability_list = pixel_reliability.flatten()
    VI_quality_list = VI_quality.flatten()
    Lon_list = lon.flatten()
    Lat_list = lat.flatten()

    #Create the dataframe

    df = pd.DataFrame(list(zip(NDVI_list, EVI_list, pixel_reliability_list, VI_quality_list, Lon_list, Lat_list)), 
               columns =['NDVI', 'EVI','pixel_reliability', 'VI_quality','Longitude', 'Latitude']) 

    #Create dataframe of the required area
    df_subset = df[(df.Latitude >= min_lat) & (df.Latitude <= max_lat) & (df.Longitude >= min_lon) & (df.Longitude <= max_lon)]
    #sort by lat, lon
    df_subset = df_subset.sort_values(by=['Latitude', 'Longitude'])
    
    return df_subset

In [44]:
# remove the files that go over 2 images (Bulawayo)/ or remove files already been run for 
#CITY_COUNTRY_lat_lon = CITY_COUNTRY_lat_lon[18:].reset_index(drop = 'True')

#CITY_COUNTRY_lat_lon = CITY_COUNTRY_lat_lon.iloc[[25,36,37]].reset_index(drop = 'True')


In [46]:
#create a list of the urban mean evi and the rural mean evi
#runtime start:11.39
for m in range(len(CITY_COUNTRY_lat_lon)):

    CITY_COUNTRY = CITY_COUNTRY_lat_lon.CITY_COUNTRY[m]
    #Area to look at 
    min_lat = CITY_COUNTRY_lat_lon[CITY_COUNTRY_lat_lon['CITY_COUNTRY'] == CITY_COUNTRY]['min_lat'].values[0]
    max_lat = CITY_COUNTRY_lat_lon[CITY_COUNTRY_lat_lon['CITY_COUNTRY'] == CITY_COUNTRY]['max_lat'].values[0]
    min_lon = CITY_COUNTRY_lat_lon[CITY_COUNTRY_lat_lon['CITY_COUNTRY'] == CITY_COUNTRY]['min_lon'].values[0]
    max_lon = CITY_COUNTRY_lat_lon[CITY_COUNTRY_lat_lon['CITY_COUNTRY'] == CITY_COUNTRY]['max_lon'].values[0]
    
    #load in the data with the list of the city vi files
    #get lists of all the VI files and their months/ years
    #TERRA
    SATELLITE_NDVI = 'MOD13A3'
    os.chdir(os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, SATELLITE_NDVI))

    file_list = []
    month_list = []
    year_list = []
    for filename in os.listdir():
        if filename.endswith(".hdf"): 
            file_list.append(filename)
            yeardoy = filename.split('.')[1][1:]
            month_list.append(dt.datetime.strptime(yeardoy, '%Y%j').strftime('%m'))
            year_list.append(dt.datetime.strptime(yeardoy, '%Y%j').strftime('%Y'))
            continue
        else:
            continue

    d = {'Filename': file_list, 'Month': month_list, 'Year': year_list}
    vi_look_up_terra = pd.DataFrame(data = d)
    vi_look_up_terra = vi_look_up_terra[vi_look_up_terra['Year'] != '2021'] #remove 2021 from the dataframe (not using)
    #AQUA
    SATELLITE_NDVI = 'MYD13A3'
    os.chdir(os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY, SATELLITE_NDVI))

    file_list = []
    month_list = []
    year_list = []
    for filename in os.listdir():
        if filename.endswith(".hdf"): 
            file_list.append(filename)
            yeardoy = filename.split('.')[1][1:]
            month_list.append(dt.datetime.strptime(yeardoy, '%Y%j').strftime('%m'))
            year_list.append(dt.datetime.strptime(yeardoy, '%Y%j').strftime('%Y'))
            continue
        else:
            continue

    d = {'Filename': file_list, 'Month': month_list, 'Year': year_list}
    vi_look_up_aqua = pd.DataFrame(data = d) 
    vi_look_up_aqua = vi_look_up_aqua[vi_look_up_aqua['Year'] != '2021'] #remove 2021 from the dataframe (not using)
    #Load in LULC data, these bases were created in JASMIN
    os.chdir(os.path.join(et.io.HOME, 'Documents', 'Python_Scripts', 'PROJECT'))
    with open('UHI_Project_Pickle_Files\LULC_Pickles\Crop_wbuffer_LULC\LULC_{}.pkl'.format(CITY_COUNTRY), 'rb') as f:
         LULC_df = pickle.load(f)
    #make sure LULC sorted by latitude and longitude
    LULC_df = LULC_df.sort_values(by=['Latitude', 'Longitude'])   

    #create the df to be filled with the mean values
    vi_means_df = vi_look_up_aqua.copy()
    vi_means_df = vi_means_df.rename(columns={"Filename": "Aqua_Filename"})
    vi_means_df['Terra_Filename'] = np.nan
    vi_means_df['rur_mean_evi'] = np.nan
    vi_means_df['urb_mean_evi'] = np.nan
    vi_means_df['pixel_reliability_percent'] = np.nan        
        
    for n in range(len(vi_means_df)):     
        #add the evi to the base
        #create the aqua and terra dataframes
        aqua_vi_file_name = vi_look_up_aqua.Filename[n]
        aqua_mon =  vi_look_up_aqua.Month[n]
        aqua_year = vi_look_up_aqua.Year[n]
        aqua_vi_df = vi_dataframe_create('MYD13A3', aqua_vi_file_name, city_top = False)

        terra_vi_file_name = vi_look_up_terra[(vi_look_up_terra.Month == aqua_mon) & (vi_look_up_terra.Year == aqua_year)].Filename.values[0]
        terra_vi_df = vi_dataframe_create('MOD13A3', terra_vi_file_name, city_top = False)

        #create a df containing final evi values (if aqua not reliable, use terra)
        LULC_df2 = LULC_df.copy()
        LULC_df2['aqua_evi'] = aqua_vi_df.EVI.values
        LULC_df2['aqua_pixel_reliability'] = aqua_vi_df.pixel_reliability.values
        LULC_df2['terra_evi'] = terra_vi_df.EVI.values
        LULC_df2['terra_pixel_reliability'] = terra_vi_df.pixel_reliability.values
        LULC_df2['evi_final'] = aqua_vi_df.EVI.values
        LULC_df2.loc[((LULC_df2['aqua_pixel_reliability'] == 1) & (LULC_df2['terra_pixel_reliability'] == 0)
                 ), 'evi_final'] = LULC_df2['terra_evi']
        LULC_df2['pixel_reliablity_final'] = 0
        LULC_df2.loc[((LULC_df2['aqua_pixel_reliability'] == 1) & (LULC_df2['terra_pixel_reliability'] == 1)
                 ), 'pixel_reliablity_final'] = 1

        #calculate the average rur/ urb evi and pixel reliability percent
        rur_mean_evi = LULC_df2[(LULC_df2['pixel_reliablity_final'] == 0)&(LULC_df2['lccs_class_overall_2015'] != 190
                                                                      )].evi_final.mean()
        urb_mean_evi = LULC_df2[(LULC_df2['pixel_reliablity_final'] == 0)&(LULC_df2['lccs_class_overall_2015'] == 190
                                                                      )].evi_final.mean()
        pixel_reliability_percent = 100* len(LULC_df2[(LULC_df2['pixel_reliablity_final'] == 0)])/ len(LULC_df2)

        #add to the overall dataframe with the list of files and the means
        vi_means_df.loc[n,'Terra_Filename'] = terra_vi_file_name
        vi_means_df.loc[n,'rur_mean_evi'] = rur_mean_evi
        vi_means_df.loc[n,'urb_mean_evi'] = urb_mean_evi
        vi_means_df.loc[n,'pixel_reliability_percent'] = pixel_reliability_percent

    #save the df
    os.chdir(os.path.join('D:\\','MODIS_NDVI', CITY_COUNTRY))
    pickle_name = 'vi_means_df_{}.pkl'.format(CITY_COUNTRY)
    with open(pickle_name, 'wb') as f:
        pickle.dump(vi_means_df, f)

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
