In [1]:
import pandas as pd
import numpy as np
from scipy.integrate import odeint
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error
import numpy as np

# Read the dataframe
tama_df = pd.read_csv('engineered_tama_data.csv')
tama_df

Unnamed: 0,hebrew_city_name,city_code,year,total_open_requests,total_requests,total_approved_requests,longitude,latitude
0,אור יהודה,2400,2005,0,0.0,0.0,34.852394,32.030971
1,אור יהודה,2400,2006,0,0.0,0.0,34.852394,32.030971
2,אור יהודה,2400,2007,0,0.0,0.0,34.852394,32.030971
3,אור יהודה,2400,2008,0,1.0,0.0,34.852394,32.030971
4,אור יהודה,2400,2009,1,0.0,0.0,34.852394,32.030971
...,...,...,...,...,...,...,...,...
1075,תל אביב יפו,5000,2018,687,164.0,83.0,34.781806,32.085300
1076,תל אביב יפו,5000,2019,768,116.0,149.0,34.781806,32.085300
1077,תל אביב יפו,5000,2020,735,238.0,115.0,34.781806,32.085300
1078,תל אביב יפו,5000,2021,858,240.0,124.0,34.781806,32.085300


In [2]:
point_in_time = 2020

In [3]:
train_df = tama_df[tama_df['year'] <= point_in_time]
test_df = tama_df[tama_df['year'] > point_in_time]

In [4]:
# Save the model
import json
def save_sir_params(city, beta, gamma):
    model = {
        'city': int(city),
        'beta': float(beta),
        'gamma': float(gamma),
    }
    sir_models = [model]
    with open("sir_model.json", "r") as sir_model_file:
        sir_models = json.load(sir_model_file)
        sir_models.append(model)
    with open("sir_model.json", 'w') as sir_model_file:
        json.dump(sir_models, sir_model_file,
                  indent=4,
                  separators=(',', ': '))

In [5]:
def SIR(X, t, beta, gamma, N):
    """
    SIR model for tama requests prediction.
    S = X[0], I=X[1], R=X[2]
    """
    dSdt = -beta * X[0] * X[1] / N
    dIdt = beta * X[0] * X[1] / N - gamma * X[1]
    dRdt = gamma * X[1]
    return [dSdt, dIdt, dRdt]
    
def infectedSIR(time_seq, beta, gamma):
    N = 10000
    I0 = 1
    X0 = [N-I0, I0, 0]
    return odeint(SIR, X0, time_seq, args=(beta, gamma, N))[:,1]
    
def min_diff(params, total_requests_data, time_data):
            beta, gamma = params
            guess = infectedSIR(time_data, beta, gamma)
            return np.sum(np.square(guess - total_requests_data))
            
            
def build_sir_models(train_df):
    cities = list(set(train_df['city_code']))
    for city in cities:
        current_city = train_df[train_df['city_code'] == city].sort_values(by='year', ignore_index=True)
        if current_city['total_requests'].sum() >= 15:
            current_city['year'] = current_city['year'] - 2005
            bounds=((0.001, None), (0.001, 10))
            result = minimize(min_diff, [0.5, 0.1], 
            args=(current_city['total_requests'].values, current_city['year'].values), 
            method='Nelder-Mead', bounds=bounds, tol=1e-6)
            beta_opt, gamma_opt = result.x
            save_sir_params(city, beta_opt, gamma_opt)

In [6]:
build_sir_models(train_df)

In [7]:
import json
from zoom_out import zoom_out, get_city_code, tama_df as tama_for_zoom_out
def predict_total_requests(city, year, handle_missing_city='min'):
    with open("sir_model.json") as sir_nodel_file:
        sir_models = json.load(sir_nodel_file)
        city_model = [model for model in sir_models if model['city'] == city]
        normalized_year = year - 2005
        if not city_model and handle_missing_city == 'zoom_out':
            city_model = [model for model in sir_models if model['city'] == get_city_code(zoom_out(city, tama_for_zoom_out), tama_for_zoom_out)]
            print(get_city_code(zoom_out(city)))
        if city_model:
            city_model = city_model[0]
            guess = infectedSIR(np.linspace(0, normalized_year, normalized_year + 1), city_model['beta'], city_model['gamma'])
            return guess[normalized_year]
        if hasattr(np, handle_missing_city):
            operation = getattr(np, handle_missing_city)
        else:
            raise Exception('Invalid operation')
        existing_cities = [model['city'] for model in sir_models]
        guesses_existing_cities = []
        for city in existing_cities:
            city_model = [model for model in sir_models if model['city'] == city][0]
            guesses_existing_cities += [infectedSIR(np.linspace(0, normalized_year, normalized_year + 1), city_model['beta'], city_model['gamma'])[normalized_year]]
        return operation(np.array(guesses_existing_cities))            

In [8]:
predict_total_requests(city=8700, year=2012)

27.649363838313782

In [9]:
predict_total_requests(city=2400, year=2012)

1.3121101100028465

In [10]:
predict_total_requests(city=2400, year=2012, handle_missing_city='zoom_out')

zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


nearest city קרית שמונה
zoom out
nearest city קרית שמונה


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tama_df['distance'] = tama_df.apply(lambda row: calculate_distance(latitude, longitude, row['latitude'], row['longitude']), axis=1)


TypeError: zoom_out() missing 1 required positional argument: 'tama_df'

In [None]:
tama_df['predicted_total_requests'] = tama_df.apply(lambda row: predict_total_requests(row['city_code'], row['year']), axis=1)

In [None]:
example_city_plot = tama_df[tama_df['city_code'] == 7200]
example_city_plot.set_index('year')[['total_requests', 'predicted_total_requests']].plot()

In [None]:
example_city_plot = tama_df[tama_df['hebrew_city_name'] == 'תל אביב יפו']
example_city_plot.set_index('year')[['total_requests', 'predicted_total_requests']].plot()

In [None]:
example_city_plot = tama_df[tama_df['hebrew_city_name'] == 'ירושלים']
example_city_plot.set_index('year')[['total_requests', 'predicted_total_requests']].plot()

In [None]:
from matplotlib import pyplot as plt
def plot_sir(city):
    with open("sir_model.json") as sir_nodel_file:
        sir_models = json.load(sir_nodel_file)
        city_model = [model for model in sir_models if model['city'] == city]
    if not city_model:
        return
    city_model = city_model[0] 
    N = 10000
    I0 = 1
    X0 = [N-I0, I0, 0]
    t_data = np.linspace(0, 30, 30)
    S = odeint(SIR, X0, t_data, args=(city_model['beta'], city_model['gamma'], N))[:,0]
    I = odeint(SIR, X0, t_data, args=(city_model['beta'], city_model['gamma'], N))[:,1]
    R = odeint(SIR, X0, t_data, args=(city_model['beta'], city_model['gamma'], N))[:,2]
    N = S + I + R
    plt.plot(t_data, S, label = "Sensitive - Eligible to apply a Tama request", color='gray')
    plt.plot(t_data, I, label = "Infected - New Requests", color='red')
    plt.plot(t_data, R, label = "Recoverd - Approved Requests", color='green')
    plt.plot(t_data, N, label = "Population, N = S + I + R", color='black')
    plt.legend()

In [None]:
plot_sir(3000)

In [None]:
example_city_plot = tama_df[tama_df['city_code'] == 2800]
example_city_plot.set_index('year')[['total_requests', 'predicted_total_requests']].plot()

In [None]:
tama_df = tama_df.dropna(subset='predicted_total_requests')
train_df = tama_df[tama_df['year'] <= point_in_time]
test_df = tama_df[tama_df['year'] > point_in_time]
np.sqrt(mean_squared_error(train_df['total_requests'], train_df['predicted_total_requests']))

In [None]:
np.sqrt(mean_squared_error(test_df['total_requests'], test_df['predicted_total_requests']))