In [None]:
import pygeos
import pandas as pd
import pyproj
import geopandas as gpd
gpd.options.use_pygeos=True

In [None]:
import numpy as np

In [None]:
from tqdm import tqdm
tqdm.pandas()

In [None]:
from solarpv.utils import get_utm_zone

In [None]:
from shapely import geometry, ops, wkt
from functools import partial

### Approach:
- get geoms for latlon boxes and buffer 10km
- get geoms for unitary union of 10km buffers
- map latlon boxes to unitary union
- map unitary union to individual arrays

In [None]:
gdf = gpd.read_file('./data/SPV_newmw.gpkg')

In [None]:
gdf['representative_point'] = gdf['geometry'].representative_point()

In [None]:
gdf = pd.DataFrame(gdf)

In [None]:
gdf['geometry'] = gdf['geometry'].apply(lambda el: el.wkt)

In [None]:
PROJ_WGS = pyproj.Proj("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

In [None]:
def buffer_geom(geom, dist):
    
    #geom = wkt.loads(geom_str)
    utm_zone = get_utm_zone(geom.representative_point().y, geom.representative_point().x)
    
    PROJ_UTM = pyproj.Proj(proj='utm',zone=utm_zone, ellps='WGS84')
    
    wgs2utm = partial(pyproj.transform, PROJ_WGS, PROJ_UTM)
    utm2wgs = partial(pyproj.transform, PROJ_UTM, PROJ_WGS)
    
    shp_utm = ops.transform(wgs2utm, geom)
    shp_utm_buffer = shp_utm.buffer(dist)
    
    shp_wgs_buffer = ops.transform(utm2wgs,shp_utm_buffer)
    
    return shp_wgs_buffer

In [None]:
# get latlon boxes
gdf['nearest_x.5'] = gdf['representative_point'].apply(lambda el: np.floor(el.x*2)/2)
gdf['nearest_y.5'] = gdf['representative_point'].apply(lambda el: np.floor(el.y*2)/2)

In [None]:
coord_pairs = list(set([tuple(cc) for cc in gdf[['nearest_x.5','nearest_y.5']].values]))

In [None]:
coord_geoms = [geometry.box(cc[0],cc[1],cc[0]+0.5, cc[1]+0.5) for cc in coord_pairs]

In [None]:
latlon_boxes = gpd.GeoDataFrame(coord_geoms).rename(columns={0:'geometry'}).set_geometry('geometry')

In [None]:
latlon_boxes['buffer_10km'] = latlon_boxes['geometry'].progress_apply(lambda el: buffer_geom(el, 10000))

In [None]:
gdf['buffer_10km'] = gdf['geometry'].progress_apply(lambda el: buffer_geom(el,10000))

In [None]:
mp = gdf['buffer_10km'].unary_union

In [None]:
len(list(mp))

In [None]:
gdf_mp = gpd.GeoDataFrame(list(mp))

In [None]:
gdf_mp = gdf_mp.rename(columns={0:'geometry'}).set_geometry('geometry')

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig,ax = plt.subplots(1,1,figsize=(24,16))
gdf.plot(ax=ax)
plt.show()

In [None]:
gdf_mp.to_file('./data/SPV_10km_buffer.gpkg',driver='GPKG')

In [None]:
tree = pygeos.STRtree([pygeos.io.from_shapely(geom) for geom in gdf_mp['geometry'].values])

In [None]:
Q = tree.query_bulk([pygeos.io.from_shapely(geom) for geom in gdf['geometry'].values], predicate='intersects')

In [None]:
Q.T

In [None]:
gdf_mp['intersects_unique_ids'] = ''

In [None]:
Q_df = pd.DataFrame(Q.T, columns = ['SPV_idx','mp_idx'])

In [None]:
gdf

In [None]:
gdf.loc[Q_df.iloc[(Q_df['mp_idx']==2207).values, Q_df.columns.get_loc('SPV_idx')].values,'unique_id'].values.tolist()

In [None]:
for ii in range(len(gdf_mp)):
    if ii%100==0:
        print (ii)
    gdf_mp.iat[ii,gdf_mp.columns.get_loc('intersects_unique_ids')] = gdf.loc[Q_df.iloc[(Q_df['mp_idx']==ii).values, Q_df.columns.get_loc('SPV_idx')].values,'unique_id'].values.tolist()

In [None]:
gdf_mp

In [None]:
import json

In [None]:
gdf_mp['intersects_unique_ids'] = gdf_mp['intersects_unique_ids'].apply(json.dumps)

In [None]:
gdf_mp.to_file('./data/SPV_10km_buffer.gpkg',driver='GPKG')

In [None]:
import geojson

In [None]:
gdf_mp

In [None]:
import os

In [None]:
def to_geojson(row):
    fname = f'{row.name}_10k.geojson'
    gj = geojson.FeatureCollection([geojson.Feature(geometry=row['geometry'], properties={'intersects_unique_ids':row['intersects_unique_ids']})])
    json.dump(gj, open(os.path.join(os.getcwd(),'data','landmark_mp',fname),'w'))

In [None]:
gdf_mp.apply(lambda row: to_geojson(row), axis=1)

### Get the labels

In [None]:
gdf_mp = gpd.read_file('./data/SPV_10km_buffer.gpkg')

In [None]:
import glob, os

**Approach:**
- get pos/neg on gdf_mp
- where there's a single id or NEG, use the results from the search and populate to gdf
- where there are multiple ids POS, need to refine the search. -> see how many units this is for a start

In [None]:
csv_results = glob.glob('./data/landmark_results_mp/*.csv')
rerun_results = glob.glob('./data/landmark_results_mp/rerun/*.csv')
T_png = glob.glob('./data/landmark_results_mp/imgs/pos/*.png')
F_png = glob.glob('./data/landmark_results_mp/imgs/neg/*.png')

In [None]:
len(csv_results)+len(rerun_results)+len(T_png)+len(F_png)

In [None]:
gdf_mp['P/N'] = ''

In [None]:
for f in csv_results+rerun_results:
    if f.split('.')[-1]=='csv':
        result = pd.read_csv(f)
        idx = os.path.split(f)[-1].split('.')[0]
        if len(result)>0:
            gdf_mp.loc[int(idx),'P/N'] = 'T'
        elif len(result)==0:
            gdf_mp.loc[int(idx),'P/N'] = 'F'
            
for f in T_png:
    idx = os.path.split(f)[-1].split('.')[0]
    gdf_mp.loc[int(idx), 'P/N'] = 'T'
    
for f in F_png:
    idx = os.path.split(f)[-1].split('.')[0]
    gdf_mp.loc[int(idx), 'P/N'] = 'F'

In [None]:
rerun = gdf_mp[gdf_mp['P/N']==''].index.values

In [None]:
rerun

In [None]:
import shutil, json

In [None]:
gdf_mp['intersects_unique_ids']= gdf_mp['intersects_unique_ids'].apply(lambda el: json.loads(el))

In [None]:
gdf_mp[(gdf_mp['P/N']=='T') & (gdf_mp['intersects_unique_ids'].str.len()>1)]

In [None]:
ambig = gdf_mp.loc[(gdf_mp['P/N']=='T') & (gdf_mp['intersects_unique_ids'].str.len()>1),:]

In [None]:
import matplotlib.pyplot as plt

In [None]:
recs = []

In [None]:
for idx, row in ambig.iterrows():
    print('IDX',idx)
    fig, ax =plt.subplots(1,1,figsize=(16,16))
    gdf_slice = gdf.loc[gdf['unique_id'].isin(row['intersects_unique_ids']),:]
    gdf_slice['buffer_geom'] = gdf_slice['geometry'].apply(lambda el: buffer_geom(el, 10000))
                                                           
    gdf_slice.set_geometry('buffer_geom').boundary.plot(ax=ax)
    for idx2, row2 in gdf_slice.iterrows():
        pt = row2['geometry'].representative_point()
        ax.text(pt.x,pt.y,row2['unique_id'])
    
    
    plt.show()
    
    rec = input('meow')
    recs.append({idx:rec})
    pd.DataFrame.from_records(recs).to_csv('./landmark_handlabel.csv')

In [None]:
recs

In [None]:
df_hand = pd.Series(recs)

In [None]:
df_hand.apply(lambda el: el.items())

In [None]:
df_hand = pd.DataFrame([r.items() for r in recs])

In [None]:
df_hand['index'] = df_hand[0].str[0]

In [None]:
df_hand['note'] = df_hand[0].str[1]

In [None]:
df_hand.drop(columns=[0]).to_csv('./landmark_handlabel.csv')

In [None]:
df_hand = df_hand.rename(columns={'index':'mp_index'})

In [None]:
df_merged = pd.merge(gdf_mp, df_hand[['mp_index','note']], how='left',left_index=True, right_on='mp_index').set_index('mp_index').drop(columns=['geometry'])

In [None]:
df_ic = pd.DataFrame(df_merged).explode('intersects_unique_ids')

In [None]:
df_ic.reset_index().set_index('intersects_unique_ids').to_csv('./data/landmark/consolidation.csv')

In [None]:
df_ic['note'].unique()

In [None]:
df_ic['note'] = df_ic['note'].astype(str)

In [None]:
only_idx =   ['42482', '62324', '34025', '22303', '15993', '42637', '42665', '42045','14788', '34473', '34015', '33551', '33606', '26072', '32314', '33631', '33678', '33850', '34611', '33957', '34060', '34054', '34168', '33585', '32686', '26231, 26251', '29078', '33822', '33919',  '1107', '59347', '59341', '59392', '59306', '59366', '24268', '24428', '32562','28544']

In [None]:
geom_lookup

In [None]:
def big_mapper(row):
    if row['note']=='nan' and row['P/N']=='F':
        return 'no results'
    elif row['note']=='nan' and row['P/N']=='T':
        return 'from csv'
    elif row['note']=='all':
        return 'from csv'
    elif row['note']=='breakup':
        return 'do individual'
    elif row['note'] in ['337 - only millewa-mallee; else all.','Walpole Island Indian Reserve to 63387, all others to all']:
        return 'from csv - custom'
    elif 'only' in row['note']:
        if row['intersects_unique_ids'] in only_idx:
            return 'do individual'
        elif 'LV intersections' in row['note']:
            return 'geomlookup_LV'
        else:
            return 'no results'
    elif row['note']=='all except 22275, 22288':
        if row['intersects_unique_ids'] in ['22275', '22288']:
            return 'no results'
        else:
            return 'from csv'
    elif row['note']=='not 22297, 62896':
        if row['intersects_unique_ids'] in ['22297', '62896']:
            return 'no results'
        else:
            return 'from csv'
    elif row['note']=='not 14738':
        if row['intersects_unique_ids'] in ['14738']:
            return 'no results'
        else:
            return 'from csv'
    elif 'uk comm' in row['note'].lower():
        return 'geomlookup_UK'
    elif row['note']=='S of 23, E, intersecting, 120.5':
        return 'geomlookup_SE231205'
    elif row['note']=='intersects socal_intersections.geojson':
        return 'geomlookup_SOCAL'
    elif row['note']=='intersects mexico':
        return 'geomlookup_MX'
    elif row['note']=='breakup south of 33.2':
        return 'geomlookup_S33.2'
    elif row['note']=='norcal intersections.geojson':
        return 'geomlookup_NORCAL'
    elif row['note']=='only long island intersections':
        return 'geomlookup_LONGISLAND'
    elif row['note']=='only boston_intersection.geojson':
        return 'geomlookup_BOSTON'
    elif row['note']=='intersects Point(-75.7, 43.1)':
        return 'geomlookup_PT75'
    elif row['note']=='intersects Point(-79.3, 44.3':
        return 'geomlookup_PT79'
    elif row['note']=='intersects minneapolis_intersections.geojson':
        return 'geomlookup_MINN'

In [None]:
df_ic['map_col'] = df_ic.apply(lambda row: big_mapper(row), axis=1)

In [None]:
df_ic.groupby('map_col').size()

In [None]:
df_ic = pd.read_csv('./data/landmark/consolidation.csv')

In [None]:
import os

In [None]:
geom_table = {
    'geomlookup_LV':gpd.read_file(os.path.join(os.getcwd(),'solarpv','analysis','proximity','LV_intersections.geojson')).unary_union,
    'geomlookup_MINN':gpd.read_file(os.path.join(os.getcwd(),'solarpv','analysis','proximity','minneapolis_intersections.geojson')).unary_union,
    'geomlookup_MX':gpd.read_file(os.path.join(os.getcwd(),'data','ne_10m_countries.gpkg')).unary_union,
    'geomlookup_NORCAL':gpd.read_file(os.path.join(os.getcwd(),'solarpv','analysis','proximity','norcal_intersections.geojson')).unary_union,
    'geomlookup_PT75':geometry.Point(-75.7, 43.1),
    'geomlookup_PT79':geometry.Point(-79.3, 44.3),
    'geomlookup_S33.2':geometry.box(-179,0,179,33.2),
    'geomlookup_SE231205':geometry.box(120.5, 0,179,23),
    'geomlookup_SOCAL':gpd.read_file(os.path.join(os.getcwd(),'solarpv','analysis','proximity','socal_intersections.geojson')).unary_union,
    'geomlookup_UK':gpd.read_file(os.path.join(os.getcwd(),'solarpv','analysis','proximity','UK_communities.gdb')),
}

In [None]:
UK_communities = gpd.read_file('./solarpv/analysis/proximity/UK_communities.gdb')

In [None]:
gdf.loc[gdf['unique_id']==68657, 'geometry'].values[0]

In [None]:
df_ic

In [None]:
def get_next_step(row):
    if row['map_col'] in [kk for kk in geom_table.keys() if kk!='geomlookup_UK']:
        ### buffer
        orig_geom = gdf.loc[gdf['unique_id']==int(row['intersects_unique_ids']),'geometry']
        if len(orig_geom)==0:
            print ('missing geom bork')
            return 'missing geom'
        else:
            orig_geom = orig_geom.values[0]
        buffered_geom = buffer_geom(orig_geom,10000)
        ### intersects
        if buffered_geom.intersects(geom_table[row['map_col']]):
            return 'do individual'
        else: 
            return 'no results'
    elif row['map_col']=='geomlookup_UK':
        return 'do UK'
    else:
        return row['map_col']
        

In [None]:
df_ic['next_step'] = df_ic.progress_apply(lambda row: get_next_step(row), axis=1)

In [None]:
df_ic.groupby('next_step').size()

In [None]:
import json

In [None]:
df_ic

In [None]:
df_ic.loc[df_ic['mp_index'].isin([1755, 1788, 1803, 1815, 1822, 1866, 3652, 1935,1938, 1977, 1981]),'next_step'] = 'do individual'

In [None]:
json.dump(df_ic.loc[df_ic['next_step']=='do individual','intersects_unique_ids'].values.tolist(), open(os.path.join(os.getcwd(),'solarpv','analysis','proximity','do_individual.json'),'w'))

In [None]:
gdf_mp

In [None]:
import geojson

In [None]:
for idx, row in df_ic.loc[df_ic['next_step']=='do individual',:].iterrows():
    orig_geom = gdf.loc[gdf['unique_id']==int(row['intersects_unique_ids']),'geometry'].values[0]
    buffered_geom = buffer_geom(orig_geom,10000)
    gj = geojson.FeatureCollection([geojson.Feature(geometry=buffered_geom,properties={})])
    json.dump(gj, open(os.path.join(os.getcwd(),'data','landmark','do_individual',f'{row["intersects_unique_ids"]}.geojson'),'w'))

### after that's done...

In [None]:
df_ic.loc[df_ic['note']=='Walpole Island Indian Reserve to 63387, all others to all','next_step'] = 'from csv'

In [None]:
df_ic['intersects_unique_ids'].dtype

In [None]:
df_ic.groupby('next_step').size()

In [None]:
import glob

In [None]:
## csv_dfs
mp_csvs = glob.glob(os.path.join(os.getcwd(),'data','landmark_results_mp','*.csv'))
individual_csvs = glob.glob(os.path.join(os.getcwd(),'data','landmark_results','do_individual','*.csv'))
csvs = {os.path.split(f)[-1].split('.')[0]:pd.read_csv(f) for f in mp_csvs}
csvs.update({'individual-'+os.path.split(f)[-1].split('.')[0]:pd.read_csv(f) for f in individual_csvs})

In [None]:
def do_names_finally(row):
    
    
    if row['next_step'] == 'no results':
        return ['']
    elif row['next_step'] == 'from csv':
        if str(row['mp_index']) not in csvs.keys():
            print('bork key!', str(row['mp_index']))
            return ['']
        return csvs[str(row['mp_index'])].loc[:,'name'].unique().tolist()
    elif row['next_step'] == 'from csv - custom':
        if row['intersects_unique_ids']==337:
            return csvs['26-special'].loc[:,'name'].unique().tolist()
        else:
            if str(row['mp_index']) not in csvs.keys():
                print('bork key!', str(row['mp_index']))
                return ['']
            return csvs[str(row['mp_index'])].loc[:,'name'].unique().tolist()
    elif row['next_step'] == 'do UK':
        orig_geom = gdf.loc[gdf['unique_id']==int(row['intersects_unique_ids']),'geometry'].values[0]
        buffered_geom = buffer_geom(orig_geom,10000)
        return UK_communities.loc[UK_communities.geometry.intersects(buffered_geom),'NAME'].unique().tolist()
    elif row['next_step'] == 'do individual':
        if str(row['mp_index']) not in csvs.keys():
            print('bork key!', str(row['mp_index']))
            return ['']
        return csvs[f'individual-{row["intersects_unique_ids"]}'].loc[:,'name'].unique().tolist()

In [None]:
df_ic['names'] = df_ic.progress_apply(lambda row: do_names_finally(row), axis=1)

In [None]:
df_ic[['intersects_unique_ids','names']]

In [None]:
gdf_wdpa = gpd.read_file('./data/SPV_wdpa.gpkg')

In [None]:
### merge to gdf

In [None]:
gdf['WDPA_proximity']

In [None]:
df_ic.to_csv('./data/landmark/consolidation.csv')

In [None]:
gdf = pd.merge(gdf,df_ic[['intersects_unique_ids','names']], how='left',left_on='unique_id',right_on='intersects_unique_ids')

In [None]:
gdf.columns

In [None]:
gdf['names'] = gdf['names'].apply(json.dumps)

In [None]:
gdf.drop(columns=['cap_test','intersects_unique_ids','GCR','eff','ILR','lc_mode','lc_arid','lc_vis','install_date_ints']).rename(columns={'WDPA_proximity':'wdpa_10km','names':'ind_comm_10km'}).to_file('./data/SPV_v3.geojson',driver='GeoJSON')

### Do I&C status

In [None]:
ambig

** rerun the missing ones**

In [None]:
len(rerun)

In [None]:
for idx in rerun:
    shutil.copy(f'./data/landmark_mp/{idx}_10k.geojson',f'./data/landmark_mp/rerun/{idx}_10k.geojson')

In [None]:
gdf