### import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn import linear_model
import matplotlib as plt
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import seaborn as sbn
plt.style.use('seaborn-poster')
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pickle

  plt.style.use('seaborn-poster')


### prepare data

In [21]:
tama_df = pd.read_csv('mix_tama_data.csv')
real_tama_df = tama_df[tama_df['is_real']]
synthetic_tama_df = tama_df[~tama_df['is_real']].head(len(real_tama_df) * 3)

### split to train and test

In [22]:
train_tama_df, test_tama_df = train_test_split(real_tama_df, test_size=0.3)
train_tama_df = pd.concat([real_tama_df, synthetic_tama_df], ignore_index=True)

### choose features model

In [23]:
chosen_features = ['ShnatBakasha', 'latitude', 'longitude'] # consider to add features

In [24]:
X_train, y_train = train_tama_df[chosen_features], train_tama_df['years_to_be_approved'].values
X_test, y_test = test_tama_df[chosen_features], test_tama_df['years_to_be_approved'].values

In [17]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14952 entries, 0 to 14951
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ShnatBakasha  14952 non-null  float64
 1   latitude      14952 non-null  float64
 2   longitude     14952 non-null  float64
dtypes: float64(3)
memory usage: 350.6 KB


### build the model

In [25]:
svr = SVR(kernel = 'poly', epsilon=1, degree=10)
svr.fit(X_train, y_train)

### evaluate and predict train data

In [26]:
mean_squared_error(y_train, svr.predict(X_train))

2.894242852892006

### evaluate and predict test data

In [27]:
mean_squared_error(y_test, svr.predict(X_test))

3.4893461603050624

### save the model to disk

In [10]:
pickle.dump(svr, open('SVR_trained_by_synthetic_data_model.sav', 'wb'))

### Example for getting prediction for user new tama request

In [11]:
from geopy.geocoders import GoogleV3


def get_gocode_address(address):
    geolocator = GoogleV3(api_key='AIzaSyCZwOnR2be6E0ThHtouRGS1pw7MEbYCeQc')
    location = geolocator.geocode(address, language='he', timeout=50)
    if location is not None:
        return (location.latitude, location.longitude)


def how_many_years_get_approval(user_input):
    svr = pickle.load(open('SVR_trained_by_synthetic_data_model.sav', 'rb'))
    location = get_gocode_address(user_input['address'])
    if location is not None:
        return round(svr.predict([[user_input['ShnatBakasha'], location[0], location[1]]])[0])

In [12]:
user_input = {'ShnatBakasha': '2014', 'address': 'גבעת פנחס 7, בני ברק'}
how_many_years_get_approval(user_input)



3

### take a look to learn about the SVR params:
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR.predict