In [1]:
import pandas as pd
import numpy as np
import pycountry
from scipy.optimize import least_squares
import random
import statsmodels
from scipy.optimize import minimize
from scipy.optimize import fsolve
import datetime
import statsmodels.api as sm

  from pandas.core import datetools


In [2]:
resident_foreigners_norm = pd.read_table("Data_final/resident_foreigners_norm.csv")

#regions = pd.read_table("Data_final/regioni.csv")
#regions_info = pd.read_table("Data_final/region_info.csv")

#prov_info = pd.read_table("Data_final/prov_cap_info.csv")

province_capitals = ['Ancona', "Valle d'Aosta / Vallée d'Aoste", "L'Aquila", 'Bari', 'Bologna', 'Cagliari', 'Campobasso', 'Catanzaro', 'Firenze', 'Genova', 'Milano', 'Napoli', 'Palermo', 'Perugia', 'Potenza', 'Roma', 'Torino', 'Trento', 'Trieste', 'Venezia']
years = list(range(2003, 2008))

In [3]:
data = resident_foreigners_norm.copy()
data = resident_foreigners_norm[(resident_foreigners_norm["Province"].isin(province_capitals)) & 
                                (resident_foreigners_norm["Year"].isin(years))]
data = data.groupby(["Province", "Country", "Year"], as_index=False).sum()

In [4]:
print("Romania", pycountry.countries.get(name="Romania").alpha_3)
print("Albania", pycountry.countries.get(name="Albania").alpha_3)
print("Morocco", pycountry.countries.get(name="Morocco").alpha_3)

Romania ROU
Albania ALB
Morocco MAR


In [5]:
data_rou = data[data["Country"] == "ROU"]
del data_rou["Country"]
data_alb = data[data["Country"] == "ALB"]
del data_alb["Country"]
data_mar = data[data["Country"] == "MAR"]
del data_mar["Country"]

In [6]:
# Get the foreigners stock value
def n_it(data_, i, t):
    return(data_[(data_["Province"] == i) & (data_["Year"] == t)]["Value"].values[0])

In [7]:
# Fill with 0 missing values
def missing_values(data_, times, territories):
    for i in territories:
        for t in times:
            try:
                temp = data_[(data_["Province"] == i) & (data_["Year"] == t)]["Value"].values[0]
            except IndexError:
                data_ = data_.append({"Province": i, "Year": t, "Value": 0}, ignore_index=True)
    return(data_)

In [8]:
# Handle missing values
data_rou = missing_values(data_rou, years, province_capitals)
data_alb = missing_values(data_alb, years, province_capitals)
data_mar = missing_values(data_mar, years, province_capitals)

In [9]:
'''# Random initialization of the parameters

# scalar
beta_initial = 3
# vector - dimension: # of territories (here, len(province_capitals))
a_i_initial = [random.random()*len(province_capitals) for p in province_capitals]

#param_init = np.random.uniform(0, 1, len(province_capitals)+1)
param_init = [0 for i in range(len(province_capitals)+1)]

# Mezzogiorno dummy
mezzogiorno = {'Abruzzo': 1, 'Lazio': 0, 'Umbria': 0, 'Provincia Autonoma Trento': 0, 
               'Friuli-Venezia Giulia': 0, 'Molise': 1, 'Calabria': 1, 
               "Valle d'Aosta / Vallée d'Aoste": 0, 'Lombardia': 0, 'Liguria': 0, 
               'Emilia-Romagna': 0, 'Sicilia': 1, 
               'Provincia Autonoma Bolzano / Bozen': 0, 'Puglia': 1, 'Campania': 1, 
               'Piemonte': 0, 'Toscana': 0, 'Sardegna': 1, 'Marche': 0, 
               'Basilicata': 1, 'Veneto': 0}'''

'# Random initialization of the parameters\n\n# scalar\nbeta_initial = 3\n# vector - dimension: # of territories (here, len(province_capitals))\na_i_initial = [random.random()*len(province_capitals) for p in province_capitals]\n\n#param_init = np.random.uniform(0, 1, len(province_capitals)+1)\nparam_init = [0 for i in range(len(province_capitals)+1)]\n\n# Mezzogiorno dummy\nmezzogiorno = {\'Abruzzo\': 1, \'Lazio\': 0, \'Umbria\': 0, \'Provincia Autonoma Trento\': 0, \n               \'Friuli-Venezia Giulia\': 0, \'Molise\': 1, \'Calabria\': 1, \n               "Valle d\'Aosta / Vallée d\'Aoste": 0, \'Lombardia\': 0, \'Liguria\': 0, \n               \'Emilia-Romagna\': 0, \'Sicilia\': 1, \n               \'Provincia Autonoma Bolzano / Bozen\': 0, \'Puglia\': 1, \'Campania\': 1, \n               \'Piemonte\': 0, \'Toscana\': 0, \'Sardegna\': 1, \'Marche\': 0, \n               \'Basilicata\': 1, \'Veneto\': 0}'

In [10]:
# reference province
I = "Roma"
# Random normal error
w_i = {p: np.random.normal(0, 1) for p in province_capitals}
e_i = {p: np.random.normal(0, 1) for p in province_capitals}
xi_i = {p: np.random.normal(0, 1) for p in province_capitals}

In [11]:
def model_I(param, data_, w_):
    log_lik = 0
    lik = 1
    beta = param[0]
    a = param[1:]
    for i in province_capitals:
        for t in years[1:]:
            f = np.log(n_it(data_, i, t)/n_it(data_, I, t)) - beta*(np.log(n_it(data_, i, t-1)/n_it(data_, I, t-1))) - a[province_capitals.index(i)] - w_[i] + w_[I]
            # We min the log-likelihood min --> take the opposite
            log_lik -= np.log(f)
            #print(f)
            #f.append(np.log(n_it(data_, i, t)/n_it(data_, I, t)) - beta*(np.log(n_it(data_, i, t-1)/n_it(data_, I, t-1))) - a)
    #log_lik = -log_lik
    return(log_lik)
    #return(lik)

In [12]:
def model_I(param, data_, w_):
    log_lik = 0
    lik = 0
    beta = param[0]
    a = param[1:]
    for i in province_capitals:
        for t in years[1:]:
            f = np.log(n_it(data_, i, t)/n_it(data_, I, t)) - beta*(np.log(n_it(data_, i, t-1)/n_it(data_, I, t-1))) - a[province_capitals.index(i)] - w_[i] + w_[I]
            # We min the log-likelihood min --> take the opposite
            #log_lik -= np.log(f)
            lik += -f
            #print(f)
            #f.append(np.log(n_it(data_, i, t)/n_it(data_, I, t)) - beta*(np.log(n_it(data_, i, t-1)/n_it(data_, I, t-1))) - a)
    #log_lik = -log_lik
    #return(log_lik)
    return(lik)

In [13]:
initial_time = datetime.datetime.now()
print ("Current time: " + str(initial_time.strftime('%H:%M:%S') ))
param_init = [.5 for i in range(len(province_capitals)+1)]
#param_init = [-10, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 
#param_init = np.random.uniform(0, 1, len(province_capitals)+1)
res =  minimize(model_I, param_init, args = (data_rou, w_i), method='CG')
#print(model_I([b, a], data_rou), b, a)
print(res.x)
print(res.message)
print(res.fun)
final_time = datetime.datetime.now() 
print ("Current time: " + str(final_time.strftime('%H:%M:%S')))
print("Computational time: " + str((final_time - initial_time)))

Current time: 12:41:46
[  1.89635362e+08  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06
  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06
  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06
  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06
  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06  -2.73639457e+06
  -2.73639457e+06]
Optimization terminated successfully.
-52786714888.764114
Current time: 12:43:52
Computational time: 0:02:05.992045


In [18]:
data_ = data_rou
# Step I results and validation
beta_hat = res.x[0]
a_hat = res.x[1:]
y_hat = []
y = []
for i in province_capitals:
    for t in years[1:]:
        y.append(np.log(n_it(data_, i, t)/n_it(data_, I, t)))
        y_hat.append(beta_hat*(np.log(n_it(data_, i, t-1)/n_it(data_, I, t-1))) + a_hat[province_capitals.index(i)])

y_mean = np.mean(y)

R2 = 1 - sum(np.subtract(y, y_hat)**2) / sum((y - y_mean)**2)
print(R2)

-1.89391436199e+17


In [17]:
1 - (res.fun / sum((y - y_mean)**2))

231543238.42596889

In [23]:
initial_time = datetime.datetime.now()
print ("Current time: " + str(initial_time.strftime('%H:%M:%S') ))

param_init = np.random.uniform(0, 1, len(province_capitals)+1)
res =  minimize(model_I, param_init, args = (data_rou, w_i), method='BFGS')
#print(model_I([b, a], data_rou), b, a)
print(res.x)
print(res.message)
print(res.fun)
final_time = datetime.datetime.now() 
print ("Current time: " + str(final_time.strftime('%H:%M:%S')))
print("Computational time: " + str((final_time - initial_time)))

Current time: 14:56:36
[  3.19693567e+08  -4.61405637e+06  -4.61405613e+06  -4.61405658e+06
  -4.61405598e+06  -4.61405576e+06  -4.61405639e+06  -4.61405614e+06
  -4.61405643e+06  -4.61405572e+06  -4.61405601e+06  -4.61288540e+06
  -4.61288545e+06  -4.61288524e+06  -4.61288505e+06  -4.61405622e+06
  -4.61756875e+06  -4.61288535e+06  -4.61405596e+06  -4.61405666e+06
  -4.61288507e+06]
Optimization terminated successfully.
-88989652536.70308
Current time: 14:58:25
Computational time: 0:01:48.554006


In [271]:
initial_time = datetime.datetime.now()
print ("Current time: " + str(initial_time.strftime('%H:%M:%S') ))

param_init = np.random.uniform(0, 1, len(province_capitals)+1)
res =  minimize(model_I, param_init, args = (data_rou, w_i), method='nelder-mead')
#print(model_I([b, a], data_rou), b, a)
print(res.x)
print(res.message)
print(res.fun)
final_time = datetime.datetime.now() 
print ("Current time: " + str(final_time.strftime('%H:%M:%S')))
print("Computational time: " + str((final_time - initial_time)))

Current time: 12:19:18


  if __name__ == '__main__':


[ 0.67569057  0.84245122  0.76489211  0.12377188  0.22210985  0.27280543
  0.45649639  0.26794596  0.65101987  0.12854509  0.06021894  0.26087093
  0.24403805  0.52957743  0.32443317  0.26878723  0.69010356  0.07427455
  0.63518135  0.83457974  0.84931738]
Maximum number of function evaluations has been exceeded.
Current time: 12:40:21
Computational time: 0:21:02.346795


In [None]:
def model_I_step_II(par, a_hat):
    log_lik = 0
    for i in province_capitals:
        f = a_hat[province_capitals.index(i)] - np.dot(np.subtract(x_i, x_I), param) - e_i[province_capitals.index(i)] - xi_i[province_capitals.index(i)]
        log_like -= np.log(f)
    return(log_like)

In [None]:
# Examples

In [245]:
from scipy.optimize import fsolve

def equations(p):
    x, y = p
    f = []
    f.append(y - x**2 -7 + 5*x)
    f.append(4*y - 8*x + 21)

    return(f)

x, y =  fsolve(equations, (5, 5))

print(equations((x, y)), x, y)


def equations(p):
    x, y = p
    return (y - x**2 -7 + 5*x, 4*y - 8*x + 21)

x, y =  fsolve(equations, (5, 5))

print(equations((x, y)), x, y)


import numpy as np, numpy.random, scipy.optimize
def residuals(p, dRA, dDE, RA, DEC):
    ex,ey,ez = p
    f1 = dRA-(ex*np.sin(DEC)*np.cos(RA)+ey*np.sin(DEC)*np.sin(RA)-ez*np.cos(DEC))
    f2 = dDE-(-ex*np.sin(RA)+ey*np.cos(RA))
    err = np.concatenate((f1,f2))
    return err    
ex, ey, ez = 0.2, 0.3, 0.4
N = 100
err = 1e-3
p0 = [0, 0., 0.]
ra, dec = np.random.uniform(0,1,N), np.random.uniform(0,.5,N)
dra = (ex*np.sin(dec)*np.cos(ra)+ey*np.sin(dec)*np.sin(ra)-ez*np.cos(dec))+np.random.normal(size=N)*err
ddec = (-ex*np.sin(ra)+ey*np.cos(ra))+np.random.normal(size=N)*err
print(scipy.optimize.leastsq(residuals, p0, args=(dra, ddec, ra, dec)))

[0.0, 0.0] 3.50000004142 1.75000008284


In [24]:
# import the packages
import numpy as np
from scipy.optimize import minimize
import scipy.stats as stats
import time

# Set up your x values
x = np.linspace(0, 100, num=100)

# Set up your observed y values with a known slope (2.4), intercept (5), and sd (4)
yObs = 5 + 2.4*x + np.random.normal(0, 4, 100)

# Define the likelihood function where params is a list of initial parameter estimates
def regressLL(params):
    # Resave the initial parameter guesses
    b0 = params[0]
    b1 = params[1]
    sd = params[2]

    # Calculate the predicted values from the initial parameter guesses
    yPred = b0 + b1*x

    # Calculate the negative log-likelihood as the negative sum of the log of a normal
    # PDF where the observed values are normally distributed around the mean (yPred)
    # with a standard deviation of sd
    logLik = -np.sum( stats.norm.logpdf(yObs, loc=yPred, scale=sd) )

    # Tell the function to return the NLL (this is what will be minimized)
    return(logLik)

# Make a list of initial parameter guesses (b0, b1, sd)    
initParams = [1, 1, 1]

# Run the minimizer
results = minimize(regressLL, initParams, method='nelder-mead')

# Print the results. They should be really close to your actual values
print (results.x)

[ 5.84476511  2.39111051  3.77154789]
