# Data Prep - Visitors
Geoff Pidcock | PacifImpact
## Scope
Model data for the following countries
- Visitors for FJ, Cook Island, Soloman Islands
- TBC

Document Assumptions <br>

Save to PostGres <br>

## Reference 
- [modelling/survey gsheet](https://docs.google.com/spreadsheets/d/1qKgOixdJtwYD0jB1ouN4-pUIqrEXEPJzAqWfYbIY06g/edit#gid=0&fvid=1204782497)

In [12]:
# Setup
import os
from dotenv import load_dotenv, find_dotenv

# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

DBUNAME = os.environ.get("DBUNAME")
DBPASSWORD = os.environ.get("DBPASSWORD")
DBHOST = os.environ.get("DBHOST")
DBPORT = os.environ.get("DBPORT")
DBNAME = os.environ.get("DBNAME")

import sqlalchemy
import psycopg2
import pandas as pd
import numpy as np

connection_str = 'postgresql+psycopg2://'+DBUNAME+':'+DBPASSWORD+'@'+DBHOST+':'+DBPORT+'/'+DBNAME

In [9]:
# evaluate dataset 1 - visitor arrival table
# source - https://www.statsfiji.gov.fj/index.php/component/advlisting/?view=download&format=raw&fileId=2148
# this was quite manual
fj_data_1 = pd.read_excel('../data/raw/fj/Visitor-Arrivals-Tables.xlsx'
                          ,sheet_name='T1'
                          ,usecols='B:F,H:J,L,N'
                          ,skiprows=15
                          ,nrows=32
                          ,header=None
                          ,names=['year','month','arrivals-resident','arrivals-visitor','arrivals-total',
                                 'departures-resident','departures-visitors','departures-total',
                                 'total-in-transit','passenger-movement'])
display(fj_data_1.head(10),fj_data_1.tail(10))

Unnamed: 0,year,month,arrivals-resident,arrivals-visitor,arrivals-total,departures-resident,departures-visitors,departures-total,total-in-transit,passenger-movement
0,2018.0,January,19324.0,62648.0,81972.0,15040.0,72627.0,87667.0,14484.0,184123.0
1,,February,9650.0,48798.0,58448.0,12016.0,45491.0,57507.0,9385.0,125340.0
2,,March,9676.0,60058.0,69734.0,12445.0,50698.0,63143.0,8304.0,141181.0
3,,April,13739.0,63535.0,77274.0,13666.0,62345.0,76011.0,7699.0,160984.0
4,,May,12975.0,67290.0,80265.0,12155.0,63679.0,75834.0,8210.0,164309.0
5,,June,12681.0,81653.0,94334.0,13107.0,70851.0,83958.0,7813.0,186105.0
6,,July,11502.0,95061.0,106563.0,14026.0,92072.0,106098.0,9648.0,222309.0
7,,August,10418.0,88693.0,99111.0,14133.0,88552.0,102685.0,8007.0,209803.0
8,,September,10198.0,81437.0,91635.0,13729.0,76513.0,90242.0,9700.0,191577.0
9,,October,12667.0,79077.0,91744.0,12341.0,81194.0,93535.0,9200.0,194479.0


Unnamed: 0,year,month,arrivals-resident,arrivals-visitor,arrivals-total,departures-resident,departures-visitors,departures-total,total-in-transit,passenger-movement
22,,October,10396.0,77467.0,87863.0,12610.0,80129.0,92739.0,9594.0,190196.0
23,,November,9688.0,69123.0,78811.0,17822.0,68917.0,86739.0,7350.0,172900.0
24,,December,18465.0,73740.0,92205.0,22026.0,61528.0,83554.0,13034.0,188793.0
25,,,,,,,,,,
26,2020.0,January,18238.0,65386.0,83624.0,15434.0,76195.0,91629.0,12571.0,187824.0
27,,February,10448.0,46343.0,56791.0,11748.0,43748.0,55496.0,8366.0,120653.0
28,,March,8166.0,27972.0,36138.0,7608.0,34782.0,42390.0,4064.0,82592.0
29,,April,510.0,678.0,1188.0,532.0,1488.0,2020.0,28.0,3236.0
30,,May,410.0,709.0,1119.0,259.0,940.0,1199.0,0.0,2318.0
31,,June,628.0,413.0,1041.0,650.0,645.0,1295.0,0.0,2336.0


## The above is incomplete - skipping to the final step

In [14]:
try:
    engine = sqlalchemy.create_engine(connection_str)
    conn = engine.connect()
except:
    print('Database connection error - check creds')

In [15]:
# confirm tables are present as expected
engine.table_names() 

['test']

In [16]:
fj_data_1.to_sql('test2',conn,if_exists='replace',index=False,method='multi')