CREATE THE STRUCTURE OF THE DATABASE

In [19]:
# import packages
from sqlalchemy import create_engine
from psycopg2 import ( 
        connect
)
import requests
import json
import pandas as pd
import geopandas as gpd
import sqlalchemy as sqlA
#import geoalchemy as geoA
from shapely.wkb import dumps as wkb_dumps

In [37]:
#open the configuration parameter from a txt file the table
myFile = open('dbConfig.txt')
connStr = myFile.readline()
myFile.close()

In [21]:
#create the structure of the database
commands = (
        
        #table for the registrantion of PA
        """ 
            CREATE TABLE pa_user(
                postal_code VARCHAR(5) PRIMARY KEY,
                municipality VARCHAR(255) NOT NULL,
                password VARCHAR(255) NOT NULL  
        )
        """,
        
        # table of the bin
        """ 
        CREATE TABLE bin(
                bin_id SERIAL PRIMARY KEY,
                bin_date TIMESTAMP DEFAULT NOW(),
                lat DOUBLE PRECISION NOT NULL,
                lon DOUBLE PRECISION NOT NULL,
                infographic BOOLEAN NOT NULL DEFAULT 'False',
                infographic_date TIMESTAMP DEFAULT NOW(),
                geom geometry(POINT)
        )
        """,
        """ 
            CREATE TABLE gardbage_collector(
                personal_code SERIAL PRIMARY KEY,
                username VARCHAR(255) NOT NULL,
                password VARCHAR(255) NOT NULL
        )
        """,
        """ 
            CREATE TABLE bin_status(
                bin_id INTEGER UNIQUE NOT NULL,
                GC_code INTEGER UNIQUE NOT NULL,
                date TIMESTAMP DEFAULT NOW(),
                overfull BOOLEAN NOT NULL DEFAULT 'False',
                PRIMARY KEY(bin_id, GC_code),
                
                CONSTRAINT fk_bin
                    FOREIGN KEY(bin_id)
                        REFERENCES bin(bin_id)
                        ON DELETE SET NULL,
                CONSTRAINT fk_gc
                    FOREIGN KEY(GC_code)
                        REFERENCES gardbage_collector(personal_code)
                        ON DELETE SET NULL
        )
        """
        )

In [22]:
#create the connection with the database
conn = connect(connStr)
cur = conn.cursor()
for command in commands :
    cur.execute(command)
cur.close()
conn.commit()
conn.close()

IMPORT DATA ABOUT THE PA CODE

In [23]:
#setup db connection (generic connection path to be update with your credentials: 'postgresql://user:password@localhost:5432/mydatabase')
engine = create_engine('postgresql://postgres:r3df0x@localhost:5432/binecoDB') 

In [25]:
# creating the datafram of the municipalities 
# data obtained from http://lab.comuni-italiani.it/download/comuni.html
# !!NOTE: i'm using the municipality of italy because i can't find a list of australian city id
#opening the file and save it in a daframe
fileTxt = open("data/listacomuni.txt")
df_patemp = pd.read_csv(fileTxt,sep=';')
fileTxt.close()

In [26]:
#df_patemp

In [27]:
df_pa = df_patemp[['Comune', 'Provincia', 'CAP']]
#df_pa

In [28]:
# write the dataframe into postgreSQL
df_pa.to_sql('pa_data', engine, if_exists = 'replace', index=False)

IMPORT DATA FROM EPICOLLECT5

In [29]:
# send the request to the API of Epicollect5
response = requests.get('https://five.epicollect.net/api/export/entries/bineco-web-application')

In [30]:
raw_data = response.text

In [31]:
# parse the raw text response 
data = json.loads(raw_data)

In [32]:
# from JSON to Pandas DataFrame
data_df = pd.json_normalize(data['data']['entries'])
len(data_df) # for a good plot it's better to extract more then 50 lines
#data_df

50

In [33]:
# from Pandas DataFrame to GeoPandas GeoDataFrame
#we add a geometry column using the numeric coordinate colums
lon = '3_Position.longitude' #NOTE they are already numeric coordinate columns 
lat = '3_Position.latitude'
data_geodf = gpd.GeoDataFrame(data_df, geometry=gpd.points_from_xy(data_df[lon], data_df[lat]))

In [34]:
#data_geodf

In [36]:
# write the dataframe into postgreSQL
data_geodf.to_postgis('litter', engine, if_exists = 'replace', index=False)

IMPORT DATA ABOUT BINS

In [38]:
# creating the dataframe of the bins
# data obtained query OSM

#opening the file and save it in a daframe
filegeojson = open("data/Waste_basket_Cairns.geojson")
bin_df = gpd.read_file(filegeojson)
fileTxt.close()

In [39]:
#extract the usefull columns
bin_df_C = bin_df[['osm_id','geometry']]
#create the columns of longitude and latitude from the geometry attribute
bin_df_C['lon'] = bin_df_C['geometry'].x
bin_df_C['lat'] = bin_df_C['geometry'].y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [40]:
# Copy the dataframe to keep the original intact
insert_gdf = bin_df_C.copy()
# Make a new field containing the WKB dumped from the geometry column, then turn it into a regular 
#this way is faster
insert_gdf["geom_wkb"] = insert_gdf["geometry"].apply(lambda x: wkb_dumps(x))

In [41]:
# creating the query command
insert_query = """
    INSERT INTO bin (bin_id,lon,lat, geom)
    VALUES (%(full_id)s,%(lon)s,%(lat)s, ST_GeomFromWKB(%(geom_wkb)s));
"""
#creating a list of parameters to be inserted into values
params_list = [
    {
        "full_id": i,
        "lon": row["lon"],
        "lat": row["lat"],
        "geom_wkb": row["geom_wkb"]
    } for i, row in insert_gdf.iterrows()
]

In [42]:
#params_list

In [43]:
# Connect to the database and make a cursor
conn = conn = connect("dbname=binecoDB user=postgres password=r3df0x")
cur = conn.cursor()

# Iterate through the list of execution parameters and apply them to an execution of the insert query
for params in params_list:
    cur.execute(insert_query, params)
cur.close()
conn.commit()
conn.close()