## הגדרות ומקדים

In [157]:


import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt
from matplotlib import pyplot as plt 
import folium
import fiona
from shapely.geometry import Point
import googlemaps as gmaps

In [105]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)


## פונקציות

In [106]:
def double_taz_num(df):
    dup_taz_num=df.groupby(['Taz_num']).size().reset_index(name='count').query('count>1').Taz_num.to_list()
    return df.loc[df['Taz_num'].isin(dup_taz_num)]

In [107]:
def make_point(df):
    df_point=df.copy()
    df_point['centroid'] = df_point.representative_point()
    df_point=df_point.set_geometry('centroid')
    df_point=df_point.drop(columns=['geometry'],axis=1)
    return df_point

In [108]:
def up_load_gdb(path,layer_name):
    path='{}'.format(path)
    layer_list=fiona.listlayers(path)
    gpd_layer=gpd.read_file(path, layer=layer_list.index(layer_name))
    return gpd_layer

In [109]:
def up_load_shp(path):
    path='{}'.format(path)
    gpd_layer=gpd.read_file(path)
    return gpd_layer

In [110]:
def unique_id_in_one_taz(df_to_geoode,unique_field,gpd_for_geocode,taz):
    
    code_to_find=list(df_to_geoode[unique_field].unique())

    gpd_for_geocode_to_sum_by_taz=gpd_for_geocode[[unique_field,'geometry']].loc[gpd_for_geocode[unique_field].isin(code_to_find)]

    gpd_for_geocode_to_sum_by_taz=gpd.sjoin(taz[['Taz_num','geometry']],gpd_for_geocode_to_sum_by_taz)

    gpd_for_geocode_in_one_taz=list(gpd_for_geocode_to_sum_by_taz.groupby(unique_field)[['Taz_num']].nunique().query('Taz_num==1').reset_index()[unique_field])

    return gpd_for_geocode_in_one_taz

In [111]:
def many_towns_in_one_taz(df_to_geoode,unique_field,gpd_for_geocode,taz):
    
    code_to_find=list(df_to_geoode[unique_field].unique())

    gpd_for_geocode_to_sum_by_taz=gpd_for_geocode[[unique_field,'geometry']].loc[gpd_for_geocode[unique_field].isin(code_to_find)]

    gpd_for_geocode_to_sum_by_taz=gpd.sjoin(taz[['Taz_num','geometry']],gpd_for_geocode_to_sum_by_taz)

    gpd_for_geocode_in_one_taz=list(gpd_for_geocode_to_sum_by_taz.groupby('Taz_num')[[unique_field]].nunique().query(f'{unique_field} > 1').reset_index()['Taz_num'])
    
    gpd_for_geocode_in_one_taz=list(gpd_for_geocode_to_sum_by_taz.loc[gpd_for_geocode_to_sum_by_taz['Taz_num'].isin(gpd_for_geocode_in_one_taz)][unique_field])
    
    return gpd_for_geocode_in_one_taz

In [112]:
def up_load_df(folder_path,file_name):
    
    path_df=r'{}\{}.xlsx'.format(folder_path,file_name)
    df=pd.read_excel(path_df)
    df=df.dropna(how='all')

    return df


## העלת טבלת מוסדות

In [113]:
folder_path=r'\\FILE-SRV\Jtmt\projections_team\GIS_backround\INFO\משרד החינוך\תלמידי ישביה וסמינירים'
file_name='כתובות של מוסדות של ירושלים ויוש_עיבוד_ראשוני'

path_df=r'{}\{}.xlsx'.format(folder_path,file_name)
df=pd.read_excel(path_df)
df=df.dropna(how='all')


gov_edu_yeshvia=df

## מס עמותה סניף שחוזר על עצמו 

In [114]:
gov_edu_yeshvia.loc[gov_edu_yeshvia.duplicated(subset=['מס_עמותה_סניף'],keep=False)]

Unnamed: 0,שם_העמותה,מס_עמותה_סניף,SETL_CODE,עיר,STR_CODE,רחוב,HOUSE_NUM,num_yeshiva_students
301,תלמוד שמביא לידי מעשה,580253151-5,3000,ירושלים,350,שטראוס,16,56
302,תלמוד שמביא לידי מעשה,580253151-5,3000,ירושלים,4361,בתי מחסה,6,56
324,ביאליסטוק,580101202-0,3000,ירושלים,6624,גבעת שאול,5,53
325,ביאליסטוק,580101202-0,3000,ירושלים,996,בעל השאילתות,23,53
335,קרלין סטולין ביתר,580184612-0,3780,ביתר עילית,104,"חת""ם סופר",8,51
336,קרלין סטולין ביתר,580184612-0,3780,ביתר עילית,203,הרב ברים,12,51
472,אור ההלכה,580098663-0,3000,ירושלים,4962,הרב לופיאן,17,31
473,אור ההלכה,580098663-0,3000,ירושלים,4962,הרב לופיאן,1,31


In [115]:
len(gov_edu_yeshvia)

692

In [116]:
gov_edu_yeshvia.drop_duplicates(subset=['מס_עמותה_סניף'],keep='first',inplace=True)

In [117]:
len(gov_edu_yeshvia)

688

## עיגון על ידי רחוב ומספר בית 

In [118]:
bld_path=r'\\svm-nas\GIS\Yali\Main_DataBase_By_Source\MPI\BNTL\BNTL 2022\jm_ApQZDNdNOE6B5XU_selections_122\Jerusalem_Transportation_122.gdb'

In [119]:
addr_point=up_load_gdb(bld_path,'ADDRESS_POINT_Clip')

In [120]:
gov_edu_yeshvia_xy=pd.merge(addr_point[['SETL_CODE','STR_CODE','HOUSE_NUM','geometry']],gov_edu_yeshvia,on=['SETL_CODE','STR_CODE','HOUSE_NUM'])

## עיגון על ידי רחוב שמרוכז באזור תנועה  

In [121]:
gov_edu_yeshvia_without_xy=pd.merge(addr_point[['SETL_CODE','STR_CODE','HOUSE_NUM','geometry']],gov_edu_yeshvia,on=['SETL_CODE','STR_CODE','HOUSE_NUM'],how='right').query('geometry.isna()')

  key_col = Index(lvals).where(~mask_left, rvals)


In [122]:
taz=up_load_shp(r'Y:\New Netwroks\4.0\TAZ\TAZ_V4_221113_Published.shp')

In [123]:
town_street_code_to_find=gov_edu_yeshvia_without_xy.drop_duplicates(subset=['SETL_CODE','STR_CODE'],keep='first')[['SETL_CODE','STR_CODE']]

In [124]:
town_street_code_to_find['town_str_code']=town_street_code_to_find['SETL_CODE'].astype(str)+town_street_code_to_find['STR_CODE'].astype(str)

In [125]:
addr_point['town_str_code']=addr_point['SETL_CODE'].astype(str)+addr_point['STR_CODE'].astype(str)

In [126]:
addr_point_to_sum_by_taz=addr_point[['town_str_code','geometry']].loc[addr_point['town_str_code'].isin(list(town_street_code_to_find.town_str_code))]

In [127]:
addr_point_to_sum_by_taz=gpd.sjoin(taz[['Taz_num','geometry']],addr_point_to_sum_by_taz)

In [128]:
addr_in_one_taz=addr_point_to_sum_by_taz.groupby('town_str_code')[['Taz_num']].nunique().query('Taz_num==1')

In [129]:
taz_point=make_point(taz)

In [130]:
gov_edu_yeshvia_without_xy['town_str_code']=gov_edu_yeshvia_without_xy['SETL_CODE'].astype(str)+gov_edu_yeshvia_without_xy['STR_CODE'].astype(str)

In [131]:
addr_in_one_taz=pd.merge(addr_in_one_taz.reset_index(),taz_point[['Taz_num','centroid']],on='Taz_num')

In [132]:
gov_edu_yeshvia_with_xy_of_taz=pd.merge(addr_in_one_taz,gov_edu_yeshvia_without_xy.drop(columns=['geometry']),on='town_str_code',how='right').query('~centroid.isna()')

In [133]:
gov_edu_yeshvia_with_xy_of_taz.rename(columns={'centroid':'geometry'},inplace=True)

In [134]:
gov_edu_yeshvia_without_xy=pd.merge(addr_in_one_taz,gov_edu_yeshvia_without_xy.drop(columns=['geometry']),on='town_str_code',how='right').query('centroid.isna()')

## עיגון על ידי יישוב שמרוכז באזור תנועה  

In [135]:
town_path=r'\\FILE-SRV\Jtmt\projections_team\GIS_backround\INFO\למ_ס\אוכלוסיה\pop_cbs.gdb'

In [136]:
town_point=up_load_gdb(town_path,'yhsuv_cbs_pop_2020')

In [137]:
town_point['SETL_CODE']=town_point['סמל_יישוב']

In [138]:
town_in_one_taz=unique_id_in_one_taz(gov_edu_yeshvia_without_xy,'SETL_CODE',addr_point,taz)

In [139]:
gov_edu_yeshvia_with_town_xy=pd.merge(town_point[['סמל_יישוב','geometry']],gov_edu_yeshvia_without_xy.loc[gov_edu_yeshvia_without_xy['SETL_CODE'].isin(town_in_one_taz)],left_on=['סמל_יישוב'],right_on=['SETL_CODE'])

In [140]:
gov_edu_yeshvia_without_xy=gov_edu_yeshvia_without_xy.loc[~gov_edu_yeshvia_without_xy['SETL_CODE'].isin(town_in_one_taz)]

## עיגון על ידי יישובים שמרוכזים באזור תנועה  

In [141]:
town_in_one_taz=many_towns_in_one_taz(gov_edu_yeshvia_without_xy,'SETL_CODE',town_point,taz)

In [142]:
gov_edu_yeshvia_with_town_xy_from_taz_withmany=pd.merge(town_point[['סמל_יישוב','geometry']],gov_edu_yeshvia_without_xy.loc[gov_edu_yeshvia_without_xy['SETL_CODE'].isin(town_in_one_taz)],left_on=['סמל_יישוב'],right_on=['SETL_CODE'])

## ייצוא של רשומות ללא עיגון 

In [143]:
gov_edu_yeshvia_without_xy=gov_edu_yeshvia_without_xy.loc[~gov_edu_yeshvia_without_xy['SETL_CODE'].isin(town_in_one_taz)]

In [144]:
gov_edu_yeshvia_without_xy['city']=gov_edu_yeshvia_without_xy['עיר']

In [145]:
gov_edu_yeshvia_without_xy['"street"']='"'+gov_edu_yeshvia_without_xy['רחוב']+'"'

In [146]:
gov_edu_yeshvia_without_xy['"num"']='"'+gov_edu_yeshvia_without_xy['HOUSE_NUM'].astype(str)+'"'

In [147]:
gov_edu_yeshvia_without_xy.loc[gov_edu_yeshvia_without_xy['"num"']=='"2022-03-01 00:00:00"','"num"']=''

In [148]:
gov_edu_yeshvia_without_xy.loc[gov_edu_yeshvia_without_xy['"num"']=='"0"','"num"']=''

In [149]:
gov_edu_yeshvia_without_xy.loc[gov_edu_yeshvia_without_xy['"num"']=='"חורש"','"num"']=''

In [159]:
col=['city','"street"', '"num"']

In [160]:
addresses=gov_edu_yeshvia_without_xy[col].to_csv('addresses.csv')

## ייצירת שכבה

In [152]:
lst=['gov_edu_yeshvia_with_town_xy','gov_edu_yeshvia_xy','gov_edu_yeshvia_with_xy_of_taz','gov_edu_yeshvia_with_town_xy_from_taz_withmany']

for i in lst:
    locals()[i]['source']= '{}'.format(i)

In [153]:
col=['geometry','num_yeshiva_students', 'שם_העמותה', 'מס_עמותה_סניף','source']

In [156]:
student_yeshiva=pd.concat([gov_edu_yeshvia_with_town_xy[col],gov_edu_yeshvia_xy[col],gov_edu_yeshvia_with_xy_of_taz[col],gov_edu_yeshvia_with_town_xy_from_taz_withmany[col]],axis=0)

In [169]:
student_yeshiva.rename(columns={'מס_עמותה_סניף':'org_id','שם_העמותה':'org_name'},inplace=True)

In [170]:
student_yeshiva.to_file('student_yeshiva.shp')

  pd.Int64Index,
  student_yeshiva.to_file('student_yeshiva.shp')
