# SQL-Alchemy 
## PostGres from Python

### Install packages 

In [8]:
#!pip install SQLAlchemy



python library for managing all kinds of relational databases

In [5]:
import sqlalchemy

In [7]:
#!pip install psycopg2



low-level python library that actually manages the communication with a PostgreSQL DB

### Warmup:
In order to connect to any database, we need **5 things**:

In [10]:
#!pip install cred
import cred

Collecting cred
  Downloading cred-0.2.3-py3-none-any.whl (20 kB)
Collecting holidaycal
  Downloading holidaycal-0.0.1-py3-none-any.whl (12 kB)
Installing collected packages: holidaycal, cred
Successfully installed cred-0.2.3 holidaycal-0.0.1


In [11]:
DATABASE = 'northwind'
PORT = '5432'
USER = cred.USER
PASSWORD = cred.PASSWORD
HOST = 'localhost'

AttributeError: module 'cred' has no attribute 'USER'

- Q: Should we be writting passwords in documents we are going to push to git?

- Q: What are alternatives?


##### Create connection string

In [None]:
conn_string = f'postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}'

In [None]:
# For those using Mac it is not neccesary to give the USER and PASSWORD

### 1. Create an Engine

In [None]:
engine = sqlalchemy.create_engine(conn_string,echo=False)
#turn on echo=True for a more verbose output to see the raw SQL being executed for you under the hood!

In [None]:
engine

Engine(postgresql://postgres:***@localhost:5432/northwind)

### 2. Pass SQL queries to execute
query: is a string with the SQL commands

#### Write Data --> to sql

##### Create a table

In [12]:
# eg. Table greeting with columns language and word 
query = "CREATE table greeting(language VARCHAR(50),word VARCHAR(50));"

In [None]:
# engine.execute(query)

##### Insert rows

In [None]:
insert_query = "INSERT INTO greeting VALUES ('english' ,'hello!');"

In [None]:
engine.execute(insert_query)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1043d3c40>

In [14]:
# Inserting multiple values into a table using one query
query = "INSERT INTO greeting VALUES ('malayam','namaskaram'),('hindi','namsate'),('German','Hallo !')"

In [15]:
engine.execute(query)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1043d3f10>

In [16]:
engine.execute("SELECT * from greeting").fetchall()

[('english', 'hello!'),
 ('malayam', 'namaskaram'),
 ('hindi', 'namsate'),
 ('German', 'Hallo !'),
 ('english', 'hello!'),
 ('malayam', 'namaskaram'),
 ('hindi', 'namsate'),
 ('German', 'Hallo !'),
 ('english', 'hello!'),
 ('malayam', 'namaskaram'),
 ('hindi', 'namsate'),
 ('German', 'Hallo !')]

#### Read in Data <-- from sql 

In [17]:
# Display content in greetings table
result = engine.execute("SELECT * from greeting")

In [18]:
result # by default the return is a compressed iteratir object, not displayed

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1043d3e50>

In [19]:
type(result.fetchall())

list

##### Save results directly into a dataframe

In [20]:
import pandas as pd

In [21]:
result = engine.execute('SELECT * FROM greeting;')

In [22]:
result_df = pd.DataFrame(result, columns = ['language', 'word'])

In [23]:
result_df

Unnamed: 0,language,word
0,english,hello!
1,malayam,namaskaram
2,hindi,namsate
3,German,Hallo !
4,english,hello!
5,malayam,namaskaram
6,hindi,namsate
7,German,Hallo !
8,english,hello!
9,malayam,namaskaram


### 3. Inspect sql elements of your databse

In [24]:
from sqlalchemy import inspect

In [25]:
inspector = inspect(engine) #we remember the engine is bound to a specific database (with its host, port, user, password)

#### Get all tables names

In [26]:
inspector.get_table_names() #like `\d` in `psql`

['greeting',
 'customers',
 'categories',
 'regions',
 'products',
 'employee_territories',
 'orders',
 'suppliers',
 'shippers',
 'territories',
 'employees',
 'order_details']

#### Inspect columns of certain tables

In [27]:
dics_columns = inspector.get_columns('greeting')

In [28]:
dics_columns

[{'name': 'language',
  'type': VARCHAR(length=50),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'word',
  'type': VARCHAR(length=50),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [29]:
col_names = []
for dic in dics_columns:
    col_names.append(dic['name'])
col_names 

['language', 'word']

---

## Combine with pandas `df.to_sql()`

In [30]:
import pandas as pd
import seaborn as sns

In [31]:
#### read in penguins data from seaborn
df = sns.load_dataset('penguins')

In [32]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [33]:
# send the df to DB
df.to_sql('penguin',engine,if_exists='replace') # replace to rewrite the table, append to append to a table

344

In [34]:
engine.execute('select * from penguin').fetchall()

[(0, 'Adelie', 'Torgersen', 39.1, 18.7, 181.0, 3750.0, 'Male'),
 (1, 'Adelie', 'Torgersen', 39.5, 17.4, 186.0, 3800.0, 'Female'),
 (2, 'Adelie', 'Torgersen', 40.3, 18.0, 195.0, 3250.0, 'Female'),
 (3, 'Adelie', 'Torgersen', None, None, None, None, None),
 (4, 'Adelie', 'Torgersen', 36.7, 19.3, 193.0, 3450.0, 'Female'),
 (5, 'Adelie', 'Torgersen', 39.3, 20.6, 190.0, 3650.0, 'Male'),
 (6, 'Adelie', 'Torgersen', 38.9, 17.8, 181.0, 3625.0, 'Female'),
 (7, 'Adelie', 'Torgersen', 39.2, 19.6, 195.0, 4675.0, 'Male'),
 (8, 'Adelie', 'Torgersen', 34.1, 18.1, 193.0, 3475.0, None),
 (9, 'Adelie', 'Torgersen', 42.0, 20.2, 190.0, 4250.0, None),
 (10, 'Adelie', 'Torgersen', 37.8, 17.1, 186.0, 3300.0, None),
 (11, 'Adelie', 'Torgersen', 37.8, 17.3, 180.0, 3700.0, None),
 (12, 'Adelie', 'Torgersen', 41.1, 17.6, 182.0, 3200.0, 'Female'),
 (13, 'Adelie', 'Torgersen', 38.6, 21.2, 191.0, 3800.0, 'Male'),
 (14, 'Adelie', 'Torgersen', 34.6, 21.1, 198.0, 4400.0, 'Male'),
 (15, 'Adelie', 'Torgersen', 36.6, 17.

In [35]:
inspector = inspect(engine) #we remember the engine is bound to a specific database (with its host, port, user, password)

In [36]:
inspector.get_table_names() 

['greeting',
 'customers',
 'categories',
 'regions',
 'products',
 'employee_territories',
 'orders',
 'suppliers',
 'shippers',
 'territories',
 'employees',
 'order_details',
 'penguin']

#### Drop table

In [37]:
engine.execute('DROP TABLE penguin;')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x119cac520>

### Recap from the queries lesson: 
Try out some of the queries from yesterday

### References: 

This is a good source when you have a bit more time to read on SQLAlchemy:

- https://docs.sqlalchemy.org/en/13/core/engines.html



#### Challenge "Read all northwind files" and load it into database

In [38]:
pwd

'/Users/arjunharidas/Documents/stationary-sriracha-encounter-notes/week_5'

In [39]:
ls

02-data_modeling.md         large_countries_2015.csv
[34m__pycache__[m[m/                [34mnorthwind[m[m/
cred.py                     postgres_from_python.ipynb
joins_exercise.sql          sql_queries.ipynb
joins_fkeys_class.sql


In [40]:
import os
data_dir = 'northwind/'
os.listdir(data_dir)
# view all files under data_dir


['customers.csv',
 'categories.csv',
 'regions.csv',
 'products.csv',
 'employee_territories.csv',
 'orders.csv',
 '.DS_Store',
 'suppliers.csv',
 'README.md',
 'shippers.csv',
 'territories.csv',
 'employees.csv',
 'order_details.csv']

In [41]:
pd.read_csv('northwind/customers.csv')

Unnamed: 0,customerID,companyName,contactName,contactTitle,address,city,region,postalCode,country,phone,fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67
...,...,...,...,...,...,...,...,...,...,...,...
86,WARTH,Wartian Herkku,Pirkko Koskitalo,Accounting Manager,Torikatu 38,Oulu,,90110,Finland,981-443655,981-443655
87,WELLI,Wellington Importadora,Paula Parente,Sales Manager,Rua do Mercado 12,Resende,SP,08737-363,Brazil,(14) 555-8122,
88,WHITC,White Clover Markets,Karl Jablonski,Owner,305 - 14th Ave. S. Suite 3B,Seattle,WA,98128,USA,(206) 555-4112,(206) 555-4115
89,WILMK,Wilman Kala,Matti Karttunen,Owner/Marketing Assistant,Keskuskatu 45,Helsinki,,21240,Finland,90-224 8858,90-224 8858


In [42]:
# Read sql tables using pandas 
# check if_exists and what happens if you don't lower the column names to select data

for file in os.listdir(data_dir):
    if file.endswith('.csv'):
        table_name = file.split('.')[0]
        df = pd.read_csv(data_dir+file)
        #df.columns = df.columns.str.lower()
        df.to_sql(table_name,engine,if_exists='replace',method='multi',chunksize=1000)
    


In [43]:
engine.execute('''SELECT customerid from customers''').fetchall()

[('ALFKI',),
 ('ANATR',),
 ('ANTON',),
 ('AROUT',),
 ('BERGS',),
 ('BLAUS',),
 ('BLONP',),
 ('BOLID',),
 ('BONAP',),
 ('BOTTM',),
 ('BSBEV',),
 ('CACTU',),
 ('CENTC',),
 ('CHOPS',),
 ('COMMI',),
 ('CONSH',),
 ('DRACD',),
 ('DUMON',),
 ('EASTC',),
 ('ERNSH',),
 ('FAMIA',),
 ('FISSA',),
 ('FOLIG',),
 ('FOLKO',),
 ('FRANK',),
 ('FRANR',),
 ('FRANS',),
 ('FURIB',),
 ('GALED',),
 ('GODOS',),
 ('GOURL',),
 ('GREAL',),
 ('GROSR',),
 ('HANAR',),
 ('HILAA',),
 ('HUNGC',),
 ('HUNGO',),
 ('ISLAT',),
 ('KOENE',),
 ('LACOR',),
 ('LAMAI',),
 ('LAUGB',),
 ('LAZYK',),
 ('LEHMS',),
 ('LETSS',),
 ('LILAS',),
 ('LINOD',),
 ('LONEP',),
 ('MAGAA',),
 ('MAISD',),
 ('MEREP',),
 ('MORGK',),
 ('NORTS',),
 ('OCEAN',),
 ('OLDWO',),
 ('OTTIK',),
 ('PARIS',),
 ('PERIC',),
 ('PICCO',),
 ('PRINI',),
 ('QUEDE',),
 ('QUEEN',),
 ('QUICK',),
 ('RANCH',),
 ('RATTC',),
 ('REGGC',),
 ('RICAR',),
 ('RICSU',),
 ('ROMEY',),
 ('SANTG',),
 ('SAVEA',),
 ('SEVES',),
 ('SIMOB',),
 ('SPECD',),
 ('SPLIR',),
 ('SUPRD',),
 ('THEBI',),

In [46]:
# eg. Read customers table in postgresql DB using pandas 

pd.read_sql('orders',engine)

Unnamed: 0,index,orderid,customerid,employeeid,orderdate,requireddate,shippeddate,shipvia,freight,shipname,shipaddress,shipcity,shipregion,shippostalcode,shipcountry
0,0,10248,VINET,5,1996-07-04 00:00:00.000,1996-08-01 00:00:00.000,1996-07-16 00:00:00.000,3,32.38,Vins et alcools Chevalier,59 rue de l'Abbaye,Reims,,51100,France
1,1,10249,TOMSP,6,1996-07-05 00:00:00.000,1996-08-16 00:00:00.000,1996-07-10 00:00:00.000,1,11.61,Toms Spezialitäten,Luisenstr. 48,Münster,,44087,Germany
2,2,10250,HANAR,4,1996-07-08 00:00:00.000,1996-08-05 00:00:00.000,1996-07-12 00:00:00.000,2,65.83,Hanari Carnes,Rua do Paço 67,Rio de Janeiro,RJ,05454-876,Brazil
3,3,10251,VICTE,3,1996-07-08 00:00:00.000,1996-08-05 00:00:00.000,1996-07-15 00:00:00.000,1,41.34,Victuailles en stock,2 rue du Commerce,Lyon,,69004,France
4,4,10252,SUPRD,4,1996-07-09 00:00:00.000,1996-08-06 00:00:00.000,1996-07-11 00:00:00.000,2,51.30,Suprêmes délices,Boulevard Tirou 255,Charleroi,,B-6000,Belgium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
825,825,11073,PERIC,2,1998-05-05 00:00:00.000,1998-06-02 00:00:00.000,,2,24.95,Pericles Comidas clásicas,Calle Dr. Jorge Cash 321,México D.F.,,05033,Mexico
826,826,11074,SIMOB,7,1998-05-06 00:00:00.000,1998-06-03 00:00:00.000,,2,18.44,Simons bistro,Vinbæltet 34,Kobenhavn,,1734,Denmark
827,827,11075,RICSU,8,1998-05-06 00:00:00.000,1998-06-03 00:00:00.000,,2,6.19,Richter Supermarkt,Starenweg 5,Genève,,1204,Switzerland
828,828,11076,BONAP,4,1998-05-06 00:00:00.000,1998-06-03 00:00:00.000,,2,38.28,Bon app',12 rue des Bouchers,Marseille,,13008,France
