In [1]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    
import numpy as np
from statsmodels.datasets import grunfeld
from linearmodels.panel  import PanelOLS
import pandas as pd
import build_data_functions as bdf
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import model_functions as mf
import plot_model_functions as pmf
import panelOLS_models 
import statsmodels.api as sm
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression

In [2]:
years = list(range(2005, 2016))

In [3]:
y = pd.read_table("/home/sara/Documents/Immigration/Shared_models/Data/resident_foreigners_norm.csv", sep = "\t", index_col=0)
y = y.groupby(["Province", "Country", "Year"], as_index=False)["Value"].sum()
y = bdf.pivot(y, "Country", "Value")

## Zone level

In [4]:
xs = pd.read_table("/home/sara/Documents/Immigration/Shared_models/Data/x_zones.csv", sep = "\t", index_col=["Province", "Year"])

#### Feature selection
Select features according to the k highest scores. The score function used is mutual information.

### ROMANIA

In [5]:
panelOLS_models.panel_regression(y, xs, years, "Romania", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.980167.
Adjusted R-squared 0.979793.
---------------- Training Results ----------------
R-squared 0.971723.
Adjusted R-squared 0.971065.
---------------- Overall Results ----------------
R-squared 0.980161.
Adjusted R-squared 0.979787.
---------------- Training Results ----------------
R-squared 0.965665.
Adjusted R-squared 0.964761.
---------------- Overall Results ----------------
R-squared 0.980101.
Adjusted R-squared 0.979726.
-------------------- Previous 2 Times --------------------
R-squared 0.981551.
Adjusted R-squared 0.981203.
---------------- Training Results ----------------
R-squared 0.974041.
Adjusted R-squared 0.973438.
---------------- Overall Results ----------------
R-squared 0.981545.
Adjusted R-squared 0.981196.
---------------- Training Results ----------------
R-squared 0.975025.
Adjusted R-squared 0.974367.
---------------- Overall Results ----------------
R-squared 0.973356.
Adjusted R-square

Using, for Romania, more than 7 features, approximately, leads to overfitting.

### MOROCCO

In [9]:
panelOLS_models.panel_regression(y, xs, years, "Morocco", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.995943.
Adjusted R-squared 0.995867.
---------------- Training Results ----------------
R-squared 0.995506.
Adjusted R-squared 0.995401.
---------------- Overall Results ----------------
R-squared 0.995897.
Adjusted R-squared 0.995820.
---------------- Training Results ----------------
R-squared 0.994830.
Adjusted R-squared 0.994694.
---------------- Overall Results ----------------
R-squared 0.995890.
Adjusted R-squared 0.995812.
-------------------- Previous 2 Times --------------------
R-squared 0.996324.
Adjusted R-squared 0.996254.
---------------- Training Results ----------------
R-squared 0.995969.
Adjusted R-squared 0.995875.
---------------- Overall Results ----------------
R-squared 0.996284.
Adjusted R-squared 0.996214.
---------------- Training Results ----------------
R-squared 0.997282.
Adjusted R-squared 0.997211.
---------------- Overall Results ----------------
R-squared 0.989840.
Adjusted R-square

### ALBANIA

In [10]:
panelOLS_models.panel_regression(y, xs, years, "Albania", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.996919.
Adjusted R-squared 0.996861.
---------------- Training Results ----------------
R-squared 0.996594.
Adjusted R-squared 0.996515.
---------------- Overall Results ----------------
R-squared 0.996884.
Adjusted R-squared 0.996825.
---------------- Training Results ----------------
R-squared 0.996106.
Adjusted R-squared 0.996004.
---------------- Overall Results ----------------
R-squared 0.996844.
Adjusted R-squared 0.996784.
-------------------- Previous 2 Times --------------------
R-squared 0.997736.
Adjusted R-squared 0.997693.
---------------- Training Results ----------------
R-squared 0.997797.
Adjusted R-squared 0.997746.
---------------- Overall Results ----------------
R-squared 0.997713.
Adjusted R-squared 0.997670.
---------------- Training Results ----------------
R-squared 0.998728.
Adjusted R-squared 0.998694.
---------------- Overall Results ----------------
R-squared 0.995739.
Adjusted R-square

### TUNISIA

In [11]:
panelOLS_models.panel_regression(y, xs, years, "Tunisia", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.990285.
Adjusted R-squared 0.990102.
---------------- Training Results ----------------
R-squared 0.988953.
Adjusted R-squared 0.988696.
---------------- Overall Results ----------------
R-squared 0.990278.
Adjusted R-squared 0.990095.
---------------- Training Results ----------------
R-squared 0.987958.
Adjusted R-squared 0.987641.
---------------- Overall Results ----------------
R-squared 0.990240.
Adjusted R-squared 0.990056.
-------------------- Previous 2 Times --------------------
R-squared 0.990403.
Adjusted R-squared 0.990222.
---------------- Training Results ----------------
R-squared 0.989114.
Adjusted R-squared 0.988861.
---------------- Overall Results ----------------
R-squared 0.990399.
Adjusted R-squared 0.990218.
---------------- Training Results ----------------
R-squared 0.993407.
Adjusted R-squared 0.993233.
---------------- Overall Results ----------------
R-squared 0.956998.
Adjusted R-square

### EGYPT

In [33]:
panelOLS_models.panel_regression(y, xs, years, "Egypt", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.981399.
Adjusted R-squared 0.981048.
---------------- Training Results ----------------
R-squared 0.978226.
Adjusted R-squared 0.977720.
---------------- Overall Results ----------------
R-squared 0.980864.
Adjusted R-squared 0.980503.
---------------- Training Results ----------------
R-squared 0.976349.
Adjusted R-squared 0.975726.
---------------- Overall Results ----------------
R-squared 0.980205.
Adjusted R-squared 0.979832.
-------------------- Previous 2 Times --------------------
R-squared 0.981457.
Adjusted R-squared 0.981107.
---------------- Training Results ----------------
R-squared 0.978689.
Adjusted R-squared 0.978194.
---------------- Overall Results ----------------
R-squared 0.980568.
Adjusted R-squared 0.980201.
---------------- Training Results ----------------
R-squared 0.977508.
Adjusted R-squared 0.976916.
---------------- Overall Results ----------------
R-squared 0.969831.
Adjusted R-square

### ECUADOR

In [13]:
panelOLS_models.panel_regression(y, xs, years, "Ecuador", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.989610.
Adjusted R-squared 0.989414.
---------------- Training Results ----------------
R-squared 0.987261.
Adjusted R-squared 0.986964.
---------------- Overall Results ----------------
R-squared 0.989603.
Adjusted R-squared 0.989407.
---------------- Training Results ----------------
R-squared 0.985442.
Adjusted R-squared 0.985059.
---------------- Overall Results ----------------
R-squared 0.989562.
Adjusted R-squared 0.989365.
-------------------- Previous 2 Times --------------------
R-squared 0.992690.
Adjusted R-squared 0.992552.
---------------- Training Results ----------------
R-squared 0.992006.
Adjusted R-squared 0.991820.
---------------- Overall Results ----------------
R-squared 0.992662.
Adjusted R-squared 0.992524.
---------------- Training Results ----------------
R-squared 0.993083.
Adjusted R-squared 0.992901.
---------------- Overall Results ----------------
R-squared 0.990642.
Adjusted R-square

### PERU

In [40]:
panelOLS_models.panel_regression(y, xs, years, "Peru", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.995936.
Adjusted R-squared 0.995860.
---------------- Training Results ----------------
R-squared 0.995463.
Adjusted R-squared 0.995357.
---------------- Overall Results ----------------
R-squared 0.995882.
Adjusted R-squared 0.995804.
---------------- Training Results ----------------
R-squared 0.994662.
Adjusted R-squared 0.994522.
---------------- Overall Results ----------------
R-squared 0.995794.
Adjusted R-squared 0.995715.
-------------------- Previous 2 Times --------------------
R-squared 0.995995.
Adjusted R-squared 0.995919.
---------------- Training Results ----------------
R-squared 0.995682.
Adjusted R-squared 0.995582.
---------------- Overall Results ----------------
R-squared 0.995922.
Adjusted R-squared 0.995845.
---------------- Training Results ----------------
R-squared 0.994875.
Adjusted R-squared 0.994740.
---------------- Overall Results ----------------
R-squared 0.995776.
Adjusted R-square

### CHINA

In [15]:
panelOLS_models.panel_regression(y, xs, years, "China", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.994832.
Adjusted R-squared 0.994735.
---------------- Training Results ----------------
R-squared 0.994564.
Adjusted R-squared 0.994437.
---------------- Overall Results ----------------
R-squared 0.994828.
Adjusted R-squared 0.994731.
---------------- Training Results ----------------
R-squared 0.993949.
Adjusted R-squared 0.993790.
---------------- Overall Results ----------------
R-squared 0.994777.
Adjusted R-squared 0.994679.
-------------------- Previous 2 Times --------------------
R-squared 0.994864.
Adjusted R-squared 0.994767.
---------------- Training Results ----------------
R-squared 0.994575.
Adjusted R-squared 0.994449.
---------------- Overall Results ----------------
R-squared 0.994855.
Adjusted R-squared 0.994758.
---------------- Training Results ----------------
R-squared 0.994654.
Adjusted R-squared 0.994513.
---------------- Overall Results ----------------
R-squared 0.992443.
Adjusted R-square

### PHILIPPINES

In [41]:
panelOLS_models.panel_regression(y, xs, years, "Philippines", [3, 5, 7, 10, 15], save = False, show = False)

--------------------- Previous  Time ---------------------
R-squared 0.994925.
Adjusted R-squared 0.994829.
---------------- Training Results ----------------
R-squared 0.996565.
Adjusted R-squared 0.996485.
---------------- Overall Results ----------------
R-squared 0.994864.
Adjusted R-squared 0.994768.
---------------- Training Results ----------------
R-squared 0.996044.
Adjusted R-squared 0.995940.
---------------- Overall Results ----------------
R-squared 0.994807.
Adjusted R-squared 0.994709.
-------------------- Previous 2 Times --------------------
R-squared 0.995188.
Adjusted R-squared 0.995097.
---------------- Training Results ----------------
R-squared 0.996605.
Adjusted R-squared 0.996526.
---------------- Overall Results ----------------
R-squared 0.995010.
Adjusted R-squared 0.994916.
---------------- Training Results ----------------
R-squared 0.996045.
Adjusted R-squared 0.995941.
---------------- Overall Results ----------------
R-squared 0.994847.
Adjusted R-square