In [1]:
import pandas as pd
import numpy as np

### Reading data

In [2]:
pop = pd.read_csv(r"C:\Users\IMBS\Downloads\programming\Data\pop_data\WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.csv", encoding='latin-1')

#### Creating time series panel data 

In [3]:
def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

def panel(df):
        
    name = get_df_name(df)
    df = df.drop(columns=['Index', 'Type','Variant','Notes', 'Country code','Parent code' ])
    
     # rmove all rows with NaN cells
    df= df.dropna(axis=0)
    
    df = df.rename(columns={'Region, subregion, country or area':'country' })
    # rows having '...' are not deleted, delete by:
    #df =df.loc[~((df['country']=='...') | (df['year']=='...') | (df['Region, subregion, country or area']=='...'))]    
    
    df['country'] = df['country'].replace({'Iran, Islamic Rep.': 'Iran'})
    dft = df.T
    new_header = dft.iloc[0]
    dft = dft[1:]
    dft.columns = new_header
    
    dft= dft.loc[:, ~(dft == '...').any()]
    
    for col in range(len(dft.columns)):
        dft.iloc[:,col] = dft.iloc[:,col].str.replace(' ','')
        
    return dft

In [4]:
panel = panel(pop)
panel

country,WORLD,More developed regions,Less developed regions,Least developed countries,"Less developed regions, excluding least developed countries","Less developed regions, excluding China",Land-locked Developing Countries (LLDC),Small Island Developing States (SIDS),High-income countries,Middle-income countries,...,Luxembourg,Monaco,Netherlands,Switzerland,NORTHERN AMERICA,Bermuda,Canada,Greenland,Saint Pierre and Miquelon,United States of America
1950,2536431,814819,1721612,195428,1526184,1157420,103803,23771,694989,1703596,...,296,20,10042,4668,172603,37,13733,23,5,158804
1951,2584034,824004,1760031,199180,1560850,1179933,105870,24209,703004,1741086,...,298,19,10168,4722,175017,38,14078,23,5,160872
1952,2630862,833720,1797142,203015,1594126,1203963,108079,24685,711534,1777129,...,300,19,10298,4781,177779,38,14445,24,5,163266
1953,2677609,843788,1833822,206986,1626836,1229440,110423,25187,720436,1812536,...,302,19,10432,4842,180813,39,14835,24,5,165910
1954,2724847,854060,1870786,211133,1659653,1256303,112894,25710,729596,1847973,...,303,19,10570,4900,184052,40,15245,25,5,168736
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,7464022,1260479,6203543,963520,5240024,4758019,485277,69515,1240426,5520843,...,579,38,16981,8380,359525,63,36383,56,6,323016
2017,7547859,1264146,6283713,986385,5297327,4831087,496988,70158,1246648,5580423,...,592,38,17021,8456,361942,63,36732,56,6,325085
2018,7631091,1267559,6363532,1009691,5353841,4904155,508906,70792,1252546,5639084,...,604,39,17060,8526,364296,63,37075,57,6,327096
2019,7713468,1270630,6442838,1033389,5409449,4977204,520973,71429,1258043,5696667,...,616,39,17097,8591,366601,63,37411,57,6,329065


### Estimating population for chosen countries for years [a,b] when year 0 is 1950

In [5]:

def autoreg(df,list_country,a,b):
    
    from statsmodels.tsa.ar_model import AutoReg
    from random import random
    import pickle
    import json
    
    yhat =[]
    yhat= pd.DataFrame(yhat)
    # contrived dataset
    data = panel[panel.columns[panel.columns.isin(list_country)]]
    data = data.astype(int).reset_index(drop=True)
    # fit model
    for col in data.columns:
        model_AutoReg = AutoReg(data.loc[:,col], lags=1,old_names=False)
        model_fit = model_AutoReg.fit()
        pickle.dump(model_AutoReg , open('model.pkl','wb'))
    # make prediction
        yhat.loc[:,col] = model_fit.predict(a,b)
       

    # creating column year
    time = []
    year = []
    year = pd.DataFrame(year)
    time = pd.DataFrame(time)
    
    years = []
    years = range(a+1950,b+1951)
    yhat['years']= years 
    yhat =yhat.set_index(['years'])
    
    #removing scientific notation
    for col in yhat.columns:
        yhat.loc[:,col] = yhat.loc[:,col].apply(lambda x: '%.0f' % x +'000')
        # inserting the thousands separator
        yhat.loc[:,col] = yhat.loc[:,col].map(lambda x: f'{int(x):,}')
    
    return(yhat)

In [8]:
#Example
s=autoreg(panel,['Bermuda', 'WORLD'],75,85)
s

Unnamed: 0_level_0,WORLD,Bermuda
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2025,8263074000,63000
2026,8358441000,63000
2027,8454387000,63000
2028,8550915000,63000
2029,8648029000,63000
2030,8745733000,63000
2031,8844029000,63000
2032,8942922000,64000
2033,9042415000,64000
2034,9142512000,64000


## Running the model using Flask

In [11]:
#for this app the index.html, layout.html and view.html files are saved in templates 
# folder and css file in static folder both whitin the main folder that includes this app(.py)  
import pandas as pd
from flask import Flask, flash, redirect, render_template, request, url_for
import json

app = Flask(__name__)
#we do not use pickle, becouse different variables would have different autoregression 
#models, otherwise one model could created by the cell above and call in this app by:
#model = pickle.load(open('model.pkl', 'rb'))

@app.route('/')
def index():
    return render_template(
        'index.html',
        data= list(panel.columns))

@app.route("/test" , methods=['GET', 'POST'])
def test():
    
    list_ = request.form.getlist('countries')
    a = request.form.get('from') #starting year of time period
    b = request.form.get('to')   #ending year of time period
    
    data_etimate=autoreg(panel,list_,int(a)-1950,int(b)-1950)
    
    return render_template('view.html',tables=[data_etimate.to_html()],
    titles = ['na','Estimation of your selected data is:'])


if __name__=='__main__':
    #by using jupyter notebook use_teloser should ne false
    app.run(debug=True, use_reloader=False) 

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [21/Jul/2021 22:25:44] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Jul/2021 22:25:44] "[33mGET /static/css/main.css HTTP/1.1[0m" 404 -
127.0.0.1 - - [21/Jul/2021 22:25:57] "[37mPOST /test HTTP/1.1[0m" 200 -


In [13]:
heroku login


SyntaxError: invalid syntax (<ipython-input-13-6f5b512ddcb2>, line 1)