## קוד מבוא

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt
from matplotlib import pyplot as plt 
import contextily as cx
import folium
import fiona
from mpl_toolkits.axes_grid1 import make_axes_locatable
from shapely.geometry import Point
import adjustText as aT

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

## פונקציות

In [3]:
def double_taz_num(df):
    dup_taz_num=df.groupby(['Taz_num']).size().reset_index(name='count').query('count>1').Taz_num.to_list()
    return df.loc[df['Taz_num'].isin(dup_taz_num)]

In [4]:
def make_point(df):
    df_point=df.copy()
    df_point['centroid'] = df_point.representative_point()
    df_point=df_point.set_geometry('centroid')
    df_point=df_point.drop(columns=['geometry'],axis=1)
    return df_point

In [5]:
def up_load_gdb(path,layer_name):
    path='{}'.format(path)
    layer_list=fiona.listlayers(path)
    gpd_layer=gpd.read_file(path, layer=layer_list.index(layer_name))
    return gpd_layer

In [6]:
def up_load_shp(path):
    path='{}'.format(path)
    gpd_layer=gpd.read_file(path)
    return gpd_layer

In [7]:
def unique_id_in_one_taz(df_to_geoode,unique_field,gpd_for_geocode,taz):
    
    code_to_find=list(df_to_geoode[unique_field].unique())

    gpd_for_geocode_to_sum_by_taz=gpd_for_geocode[[unique_field,'geometry']].loc[gpd_for_geocode[unique_field].isin(code_to_find)]

    gpd_for_geocode_to_sum_by_taz=gpd.sjoin(taz[['Taz_num','geometry']],gpd_for_geocode_to_sum_by_taz)

    gpd_for_geocode_in_one_taz=list(gpd_for_geocode_to_sum_by_taz.groupby(unique_field)[['Taz_num']].nunique().query('Taz_num==1').reset_index()[unique_field])

    return gpd_for_geocode_in_one_taz

In [8]:
def up_load_df(folder_path,file_name):
    
    path_df=r'{}\{}.xlsx'.format(folder_path,file_name)
    df=pd.read_excel(path_df)
    df=df.dropna(how='all')

    return df


In [9]:
def logic_test_for_forecast(taz):
    
    
    print ('taz num under 0:',list(taz.loc[taz['TAZ']<=0]['TAZ']))
    
    print ('taz num duplicate:',list(taz.loc[taz.duplicated(subset='TAZ',keep=False)]['TAZ']))
    
    print ('yosh_unique:',list(taz.yosh.unique()))
    
    print ('in_jerusalem_metropolin_unique:',list(taz.in_jerusalem_metropolin.unique()))
    
    print ('jerusalem_city_unique:',list(taz.jerusalem_city.unique()))
    
    print ('sector_unique:',list(taz.sector.unique()))
    


    col=['hh_total',
    'pop',
    'age0_4',
    'age5_9',
    'age10_14',
    'age15_19',
    'age20_24',
    'age25_29',
    'age30_34',
    'age35_39',
    'age40_44',
    'age45_49',
    'age50_54',
    'age55_59',
    'age60_64',
    'age65_69',
    'age70_74',
    'age75up',
    'emp_tot',
    'indus',
    'com_hotel',
    'business',
    'public',
    'education',
    'agri',
    'student',
    'univ',
    'UO_Hi_Ed',
    'pop_emp_employed']

    taz_to_check_minus=[]
    col_to_check_minus=[]
    for i in col:
        taz_to_check_minus=taz_to_check_minus+list(taz.loc[taz['{}'.format(i)]<0]['TAZ'])
    
    
    print ('taz num with minus:',list(set(taz_to_check_minus)))
    
    print ('pop more then hh:',list(taz.loc[taz['sector']!="Palestinian"].loc[~(taz['hh_total']<=taz['pop'])]['TAZ']))
    
    print ('hh missing pop:',list(taz.loc[taz['sector']!="Palestinian"].loc[taz['hh_total']>0].loc[taz['pop']<=0]['TAZ']))
    
    print ('pop missing hh:',list(taz.loc[taz['sector']!="Palestinian"].loc[taz['pop']>0].loc[taz['hh_total']<=0]['TAZ']))



    col=['age0_4',
    'age5_9',
    'age10_14',
    'age15_19',
    'age20_24',
    'age25_29',
    'age30_34',
    'age35_39',
    'age40_44',
    'age45_49',
    'age50_54',
    'age55_59',
    'age60_64',
    'age65_69',
    'age70_74',
    'age75up',]

    taz['pop_check']=taz[col].sum(axis=1)-taz['pop']
    
    taz['pop_check']=taz['pop_check'].round(0)
    
    print ('pop vs age dis Mistake:',list(taz.loc[taz['sector']!="Palestinian"].loc[taz['pop_check']!=0]['TAZ']))   

    

    col=[
    'age0_4',
    'age5_9',
    'age10_14',
    'age15_19',
    'age20_24',
    'age25_29',
    'age30_34',
    'age35_39',
    'age40_44',
    'age45_49',
    'age50_54',
    'age55_59',
    'age60_64',
    'age65_69',
    'age70_74',
    'age75up']

    taz_to_check_for_age=[]
    for i in col:
        taz_to_check_for_age=taz_to_check_for_age+list(taz.loc[taz['{}'.format(i)]>0].loc[taz['pop']<=0]['TAZ'])
        taz_to_check_for_age=taz_to_check_for_age+list(taz.loc[taz['{}'.format(i)]>0].loc[taz['hh_total']<=0]['TAZ'])
        
    print ('taz num with age dis prob:',taz_to_check_for_age)

    col=['indus',
    'com_hotel',
    'business',
    'public',
    'education',
    'agri']

    taz['emp_check']=taz[col].sum(axis=1)-taz['emp_tot']
    taz['emp_check']=abs(taz['emp_check'].round(0))
    
    taz_prob_emp_total=list(taz.loc[taz['emp_check']>1]['TAZ'])
    
    print ('taz num with emp total prob:',taz_prob_emp_total)

    

    taz_to_check_for_split_emp=[]
    
    for i in col:
        taz_to_check_for_split_emp=taz_to_check_for_split_emp+list(taz.loc[taz['{}'.format(i)]>0].loc[taz['emp_tot']<=0]['TAZ'])

        
    print ('taz num with emp dis prob:',taz_to_check_for_split_emp)
    

    print ('taz num with pop_emp_employed worng :',list(taz.loc[taz['sector']!="Palestinian"].loc[taz['pop_emp_employed']>0].loc[taz['pop']<=0]['TAZ']))
    
    return

##  העלת טבלאות תחזית

מצב קיים

In [10]:
folder_path=r'\\FILE-SRV\Jtmt\projections_team\כללי\פעולות\יצירת_מצב_קיים_למס\output'
    
file_name='2020_jtmt_forcast_full_230720'

forecast_2020=up_load_df(folder_path,file_name)

מצב עתידי כל שנה

In [11]:
file_date=pd.Timestamp.today().strftime('%y%m%d')

v_date='230720'

year=['2025','2030','2035','2040','2045','2050']

for y in year: 
    
  
    folder_path=r'W:\Projects\הסעת המונים\01_שלב ה\קבצי עבודה\תחזיות_דמוגרפיות\תחזיות_2050\iplan\join_forecast'

    file_name='{}_forecast_{}_full'.format(v_date,y)

    locals()['forecast_{}'.format(y)]=up_load_df(folder_path,file_name)

In [12]:
col_2050=['Taz_num','student_yeshiva',
 'student',
 'uni_students','aprt',
 'pop_without_dorms_yeshiva','emp_Education','emp_okev', 'emp_not_okev',
 'total_emp',
 'agri',
 'Indus',
 'Com_hotel',
 'Business',
 'Public']

In [13]:
col_2020=['Taz_num',
 'aprt_20','pop_without_dorms_yeshiva','student',
 'uni_students',
 'student_yeshiva','total_emp','emp_okev',
 'emp_not_okev','emp_Education',
 'agri',
 'Indus',
 'Com_hotel',
 'Business',
 'Public']

שכבת אזורי תנועה

In [14]:

shp=up_load_shp(r'W:\Model Versions\4.0\TAZ\TAZ_V4_230518_Published.shp')

folder_path=r'W:\Projects\הסעת המונים\01_שלב ה\קבצי עבודה\תחזיות_דמוגרפיות\תרחישי_מלל\מרכיבי_תחזית'

taz_sector_change=up_load_df(folder_path,'230704_taz_num_convert_from_jewish_to_u_orthodox')

lst_change_sector=list(taz_sector_change.Taz_num)


shp.loc[(shp['Taz_num'].isin(lst_change_sector))&(shp['zonetype']=='Judea and Samaria'),'main_secto']='U_Orthodox'

col_taz=['Taz_num',
 'Taz_name',
 'main_secto',
 'Muni_Heb',
 'jeru_metro',
 'zonetype',
 'in_jerusal',
 'SCHN_NAME',]


חיבור הכל

In [15]:
df=shp[col_taz].merge(forecast_2020[col_2020],on='Taz_num',how='left')

שלב ראשון קיצרתי את השמות שלהם

In [16]:
for y in year: 
    
    locals()['forecast_{}'.format(y)]=locals()['forecast_{}'.format(y)][col_2050]

שלב שני לייצר לכל שנה רשימה חדשה של שמות השדות שלו עם שינוי

In [17]:
for y in year: 
    col=['student_yeshiva',
         'student',
         'uni_students','aprt',
         'pop_without_dorms_yeshiva','emp_Education','emp_okev', 'emp_not_okev',
         'total_emp',
         'agri',
         'Indus',
         'Com_hotel',
         'Business',
         'Public']
    
    x=[]
    
    for i in col: x=x+[i+'_{}'.format(y)]
        
    x=['Taz_num']+x
    
    locals()['col_{}'.format(y)]=x

שלב שלישי לקחת את השמות החדשים ולשנות את העמודות של כל אחד

In [18]:
for y in year: 
    
    locals()['forecast_{}'.format(y)].columns=locals()['col_{}'.format(y)]

שלב אחרון לחבר את כולם אל הקיים

In [19]:
for y in year: 
    df=df.merge(locals()['forecast_{}'.format(y)],on='Taz_num',how='left')
    

x='Public'
my_list = [x]
start_year = 2025
end_year = 2050
step = 5

for year in range(start_year, end_year + 1, step):
    text = x +'_'+ str(year)
    my_list.append(text)

print(my_list)


In [20]:
col_order=['zonetype',
 'jeru_metro',
 'Muni_Heb',
 'main_secto',
 'in_jerusal',
 'SCHN_NAME',
 'Taz_num',
 'Taz_name',
 'aprt_20',
 'aprt_2025',
 'aprt_2030',
 'aprt_2035',
 'aprt_2040',
 'aprt_2045',
 'aprt_2050',
 'pop_without_dorms_yeshiva',
 'pop_without_dorms_yeshiva_2025',
 'pop_without_dorms_yeshiva_2030',
 'pop_without_dorms_yeshiva_2035',
 'pop_without_dorms_yeshiva_2040',
 'pop_without_dorms_yeshiva_2045',
 'pop_without_dorms_yeshiva_2050',
 'student',
 'student_2025',
 'student_2030',
 'student_2035',
 'student_2040',
 'student_2045',
 'student_2050',
 'uni_students',
 'uni_students_2025',
 'uni_students_2030',
 'uni_students_2035',
 'uni_students_2040',
 'uni_students_2045',
 'uni_students_2050',
 'student_yeshiva',
 'student_yeshiva_2025',
 'student_yeshiva_2030',
 'student_yeshiva_2035',
 'student_yeshiva_2040',
 'student_yeshiva_2045',
 'student_yeshiva_2050',
 'total_emp',
 'total_emp_2025',
 'total_emp_2030',
 'total_emp_2035',
 'total_emp_2040',
 'total_emp_2045',
 'total_emp_2050',
 'emp_okev',
 'emp_okev_2025',
 'emp_okev_2030',
 'emp_okev_2035',
 'emp_okev_2040',
 'emp_okev_2045',
 'emp_okev_2050',
 'emp_not_okev',
 'emp_not_okev_2025',
 'emp_not_okev_2030',
 'emp_not_okev_2035',
 'emp_not_okev_2040',
 'emp_not_okev_2045',
 'emp_not_okev_2050',
 'emp_Education',
 'emp_Education_2025',
 'emp_Education_2030',
 'emp_Education_2035',
 'emp_Education_2040',
 'emp_Education_2045',
 'emp_Education_2050',
 'agri',
 'agri_2025',
 'agri_2030',
 'agri_2035',
 'agri_2040',
 'agri_2045',
 'agri_2050',
 'Indus',
 'Indus_2025',
 'Indus_2030',
 'Indus_2035',
 'Indus_2040',
 'Indus_2045',
 'Indus_2050',
 'Com_hotel',
 'Com_hotel_2025',
 'Com_hotel_2030',
 'Com_hotel_2035',
 'Com_hotel_2040',
 'Com_hotel_2045',
 'Com_hotel_2050',
 'Business',
 'Business_2025',
 'Business_2030',
 'Business_2035',
 'Business_2040',
 'Business_2045',
 'Business_2050',
 'Public',
 'Public_2025',
 'Public_2030',
 'Public_2035',
 'Public_2040',
 'Public_2045',
 'Public_2050']


In [21]:
save_file_path=r'W:\Projects\הסעת המונים\01_שלב ה\קבצי עבודה\תחזיות_דמוגרפיות\תחזיות_2050\iplan\join_forecast'

save_excel_path=r'{}\{}_forecast_2020_till_2050_iplan.xlsx'.format(save_file_path,file_date)

df[col_order].to_excel(save_excel_path,index=False)