In [None]:
import pandas as pd
codepoint_dir = r"raw/codepo_gb"

# Get column names
column_headers_df = pd.read_csv("raw/codepo_gb/Doc/Code-Point_Open_Column_Headers.csv")
headers = column_headers_df.loc[0]
headers = [h.lower() for h in list(headers)]

In [None]:
# Iterate through the CSVs in codepoint open concatenating them together into one big table
import os 
files = os.listdir(os.path.join(codepoint_dir,"Data/CSV"))

dfs = []
for f in files:
    this_file = os.path.join(codepoint_dir,"Data/CSV", f)
    if ".csv" in this_file:
        this_df = pd.read_csv(this_file, header=None)
        dfs.append(this_df)

In [None]:
final_df = pd.concat(dfs)
final_df.columns = headers

In [None]:
len(final_df)

In [None]:
import sys
sys.path.append('..')

from db_connections.connections import get_engine
engine = get_engine()

In [None]:
final_df.to_csv("all_postcodes.csv", index=False)

In [None]:
sql = """
drop table all_postcodes;
CREATE TABLE all_postcodes
(
  postcode text,
  positional_quality_indicator bigint,
  eastings bigint,
  northings bigint,
  country_code text,
  nhs_regional_ha_code text,
  nhs_ha_code text,
  admin_county_code text,
  admin_district_code text,
  admin_ward_code text
);
"""
from db_connections.connections import get_conn
conn = get_conn()
cur = conn.cursor()
with conn:
    with cur:
        cur.execute(sql)
    

In [None]:
import os
import datetime
import subprocess
from db_connections.connections import host, username, dbname

command = "\copy all_postcodes FROM 'all_postcodes.csv' DELIMITER ',' CSV HEADER"
psql_template = 'psql -p 5432 --host {} --username {} --dbname {} --command "{}"'
bash_command = psql_template.format(host, username, dbname, command.strip())


process = subprocess.Popen(bash_command, stdout=subprocess.PIPE, shell=True) 

output, error = process.communicate()
 

In [None]:
# Create geometry column for the points including a spatial index for efficient querying
sql = """
SELECT AddGeometryColumn ('all_postcodes', 'geom', 27700, 'POINT', 2);
UPDATE all_postcodes SET geom = ST_GeomFromText('POINT(' || eastings || ' ' || northings || ')', 27700 );
CREATE INDEX idx_geom_all_postcodes ON all_postcodes USING gist(geom);
"""

conn = get_conn()
cur = conn.cursor()
with conn:
    with cur:
        cur.execute(sql)
    

In [None]:
sql = """
ALTER TABLE all_postcodes ADD lat float, ADD lng float;

UPDATE all_postcodes SET
    lng = ST_X(ST_TRANSFORM(geom, 4326)),
    lat = ST_Y(ST_TRANSFORM(geom,4326));

""" 
conn = get_conn()
cur = conn.cursor()
with conn:
    with cur:
        cur.execute(sql)
    

In [None]:

sql = """
CREATE INDEX idx_all_postcodes_postcode
  ON all_postcodes
  USING btree
  (postcode);

""" 
conn = get_conn()
cur = conn.cursor()
with conn:
    with cur:
        cur.execute(sql)