### Database Troubleshooting

Resources, tips, and utilities for all those times when I choke the database.

[Finding and killing long running queries on PostgreSQL](https://medium.com/little-programming-joys/finding-and-killing-long-running-queries-on-postgres-7c4f0449e86d)

In [1]:
# IMPORTS
import geopandas as gpd
import pandas as pd

# import os
# import urllib.request
# import requests
# import shutil
# from pathlib import Path
# from zipfile import ZipFile

import matplotlib.pyplot as plt
from matplotlib import pyplot

# import folium

# from shapely.geometry import Point, Polygon

# from geopandas.tools import overlay

# from geopy.geocoders import Nominatim # for geocoding

# a few more imports specfic to the database process
# import geoalchemy2 
import getpass

import psycopg2
import numpy
from psycopg2.extensions import adapt, register_adapter, AsIs

from sqlalchemy import create_engine


In [2]:
# pip install GeoAlchemy2

In [3]:
pip install GeoAlchemy2==0.10.2

Note: you may need to restart the kernel to use updated packages.


In [4]:
import geoalchemy2 

In [5]:
# get user password for connecting to the db
mypasswd = getpass.getpass()

········


In [6]:
# set up db connection
conn = psycopg2.connect(database = 'cappsds_psmd39', 
                              user = 'psmd39', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)


# establish cursor and read the existing tables
cursor = conn.cursor()

cursor.execute("""SELECT relname FROM pg_class WHERE relkind='r'
                  AND relname !~ '^(pg_|sql_)';""") # "rel" is short for relation.

tables = [i[0] for i in cursor.fetchall()] # A list() of tables.
tables.sort()
tables


['country_borders',
 'gadm_admin_borders',
 'geonames_feature',
 'msorfailedgeocoding',
 'msorfailedgeocodingv2',
 'spatial_ref_sys',
 'stlchildcare',
 'stlnonrestrictedresidential',
 'stlnonrestrictedresparcels',
 'stlpubschools',
 'stlpvtschools',
 'stlresparcels',
 'stlrestrictedflat',
 'stlsexoffenders',
 'stlzoning']

In [8]:
# check out the active connections
cursor.execute("""SELECT count(*),state FROM pg_stat_activity GROUP BY 2;""")
# cursor.execute("""SELECT * FROM pg_stat_activity WHERE state = 'active';""")
# cursor.execute("""SELECT * FROM pg_stat_activity WHERE state = 'idle';""")
# cursor.execute("""SELECT pid, query, state FROM pg_stat_activity WHERE state = 'idle in transaction';""")

# cursor.execute("""SELECT
#                       pid,
#                       now() - pg_stat_activity.query_start AS duration,
#                       query,
#                       state
#                     FROM pg_stat_activity
#                     WHERE (now() - pg_stat_activity.query_start) > interval '1 minutes';""")

# cursor.execute("""SELECT pg_terminate_backend(17876) FROM pg_stat_activity""")
# cursor.execute("""SELECT pg_cancel_backend(17876);""")

cursor.fetchall()




[(13, None), (1, 'active')]

In [None]:
cursor.execute("""GRANT SELECT ON geospatial.gadm_admin_borders TO psmd39;""")
cursor.fetchall()

In [None]:
cursor.execute("""ALTER TABLE msorfailedgeocodingv2 RENAME TO msorfailedgeocodingv2_basic;""")
msorfailedgeocoding_basic

In [None]:
# RENAME A TABLE

# engine = create_engine('postgresql://psmd39:<INSERT PASSWORD>@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)

engine = create_engine(f'postgresql://psmd39:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)
result = engine.execute('ALTER TABLE stlsexoffenders RENAME TO stlsexoffenders_basic;')

In [None]:
# PLACEHOLDER TO DROP A TABLE IF NECESSARY

engine = create_engine(f'postgresql://psmd39:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)
result = engine.execute('DROP TABLE IF EXISTS public.stlsexoffenders;')

In [None]:
# PLACEHOLDER TO DROP A TABLE IF NECESSARY

engine = create_engine(f'postgresql://psmd39:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)
result = engine.execute('DROP INDEX stlsexoffenders.idx_stlsexoffenders_geometry;')

In [None]:
DROP INDEX table_name.index_name;


In [None]:
cursor.execute("""SELECT relname FROM pg_class WHERE relkind='r'
                  AND relname !~ '^(pg_|sql_)';""") # "rel" is short for relation.

tables = [i[0] for i in cursor.fetchall()] # A list() of tables.
tables.sort()
tables

In [9]:
conn.close()

In [None]:
# test to make sure the load actually worked
# query the table and read data into a geodf 
sql = "select * from stlsexoffenders;"
db_test_out = pd.read_sql(sql, conn)
db_test_out


In [None]:
db_test_out.to_csv(r'msorfailedgeocodingv2_basic.csv', index = False)


## Read data from CSV, convert to geodataframe, load to PostGIS

In [None]:
df = pd.read_csv('data/stlsexoffenders.csv')
df.head()

In [None]:
from shapely.geometry import Point, Polygon

# create the 'geometry' column for the geodataframe
geometry = [Point(xy) for xy in zip(df['long'], df['lat'])]
# generate the geodataframe using the msor df + the geometry info
# set the CRS (in degrees) as part of this process
msor_db = gpd.GeoDataFrame(df, geometry = geometry, crs=4326) 

In [None]:
msor_db.head()

In [None]:
# Set up database connection engine
# FORMAT: engine = create_engine('postgresql://user:password@host:5432/')
engine = create_engine(f'postgresql://psmd39:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)

# GeoDataFrame to PostGIS
msor_db.to_postgis(
    con=engine,
    name="stlsexoffenders_detailed",
    if_exists='replace'
)


## Working with Dr. Scott - Mar 19

In [None]:
df = pd.read_csv('data/stlsexoffenders.csv')
df.head()

In [None]:
from shapely.geometry import Point, Polygon

# create the 'geometry' column for the geodataframe
geometry = [Point(xy) for xy in zip(df['long'], df['lat'])]
# generate the geodataframe using the msor df + the geometry info
# set the CRS (in degrees) as part of this process
msor_db = gpd.GeoDataFrame(df, geometry = geometry, crs=4326) 
msor_db.shape

In [None]:
msor_db_toptwo = msor_db.head(2)
msor_db_toptwo = msor_db_toptwo.set_index('randomid')
msor_db_toptwo

In [None]:
# GeoDataFrame to PostGIS
msor_db.to_postgis(
    con=engine,
    name="stlsexoffenders",
    if_exists='replace'
)

In [None]:
sql = "select * from stlsexoffenders;"
db_test_out = pd.read_sql(sql, conn)
db_test_out


In [None]:
import os
pg_user = 'psmd39'

cnx = create_engine(f'postgresql://{pg_user}:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)

In [None]:
# # SQLAlchemy connectable
# cnx = create_engine('sqlite:///students.db').connect()
  
# table named 'students' will be returned as a dataframe.
df = pd.read_sql_table('stlchildcare', cnx)
print(df)