In [1]:
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
import json

In [2]:
tama_df = pd.read_csv('engineered_tama_data.csv')
tama_df

Unnamed: 0,hebrew_city_name,city_code,year,total_open_requests,total_requests,total_approved_requests,longitude,latitude
0,אור יהודה,2400,2005,0,0.0,0.0,34.852394,32.030971
1,אור יהודה,2400,2006,0,0.0,0.0,34.852394,32.030971
2,אור יהודה,2400,2007,0,0.0,0.0,34.852394,32.030971
3,אור יהודה,2400,2008,0,1.0,0.0,34.852394,32.030971
4,אור יהודה,2400,2009,1,0.0,0.0,34.852394,32.030971
...,...,...,...,...,...,...,...,...
1075,תל אביב יפו,5000,2018,687,164.0,83.0,34.781806,32.085300
1076,תל אביב יפו,5000,2019,768,116.0,149.0,34.781806,32.085300
1077,תל אביב יפו,5000,2020,735,238.0,115.0,34.781806,32.085300
1078,תל אביב יפו,5000,2021,858,240.0,124.0,34.781806,32.085300


In [3]:
from geopy.geocoders import Nominatim
def get_city_coordinates(city):
    # Create a geocoder instance
    geolocator = Nominatim(user_agent="my_app")
    # Geocode the city to get the location information
    location = geolocator.geocode(city, language="he", timeout=10)
    # Extract the latitude and longitude from the location data
    if location:
        latitude = location.latitude
        longitude = location.longitude
        return longitude, latitude
    return None, None

In [4]:
def calculate_distance(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers

    # Convert latitude and longitude to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Calculate the differences in latitude and longitude
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Apply the Haversine formula
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c

    return distance

In [5]:
def get_nearest_city(longitude, latitude, existing_cities, tama_df):
    tama_df = tama_df[tama_df['city_code'].isin(existing_cities)]
    # Calculate the distance from the reference point for each row in the dataframe
    tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)

    # Find the row with the minimum distance
    nearest_point = tama_df.loc[tama_df['distance'].idxmin()]

    # Print the nearest point
    nearest_city = nearest_point['hebrew_city_name']
    print('nearest city', nearest_city)
    return nearest_city

In [6]:
def zoom_out(city, tama_df):
    with open("sir_model.json") as sir_nodel_file:
        sir_models = json.load(sir_nodel_file)
        existing_cities = [model['city'] for model in sir_models]
    if city not in existing_cities:
        print('zoom out')
        city_df = tama_df[tama_df['hebrew_city_name'] == city]
        if city_df.empty:
            longitude, latitude = get_city_coordinates(city)
        else:
            longitude, latitude = city_df['longitude'].iloc[0], city_df['latitude'].iloc[0]
        return get_nearest_city(longitude, latitude, existing_cities, tama_df)
    return city

In [7]:
zoom_out('בית שמש', tama_df)

zoom out
nearest city ירושלים


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


'ירושלים'

In [8]:
zoom_out('באר שבע', tama_df)

zoom out
nearest city אשדוד


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


'אשדוד'

In [13]:
zoom_out('אור יהודה', tama_df)

zoom out
nearest city קרית אונו


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


'קרית אונו'

In [9]:
def get_city_code(city):
    city_map_name_to_code = tama_df[['hebrew_city_name', 'city_code']].drop_duplicates()
    city_map_name_to_code = city_map_name_to_code[city_map_name_to_code['hebrew_city_name'] == city]['city_code']
    return int(city_map_name_to_code)

In [14]:
get_city_code('קרית אונו')

2620

In [10]:
get_city_code('רמת גן')

8600

In [11]:
def get_city_name(city):
    city_map_name_to_code = tama_df[['hebrew_city_name', 'city_code']].drop_duplicates()
    city_map_name_to_code = city_map_name_to_code.loc[city_map_name_to_code['city_code'] == city, 'hebrew_city_name']
    return list(city_map_name_to_code.values)[0]

In [12]:
get_city_name(8600)

'רמת גן'