In [1]:
import pandas as pd
import psycopg2 as ps
from getpass import getpass

# 1. Set up Database Credentials

This is optional if you wish to use an already existing database and user.

If you'd like to create a dummy user and database to work with just for this session, run the commands below in your terminal. Both of these, along with all other objects created will be removed at the end.

```bash
 createuser dummy_user --createdb -P
 createdb testing_psycopg
```

# 2. Connecting to the Postgres Server

In [2]:
# Ammend as necessary if using a different role/user
conn = ps.connect(dbname='testing_psycopg', user='dummy_user', password=getpass(),
                   host='localhost', port='5432')

# Initialise cursor
cur = conn.cursor()

········


# 3. Basic Database Operations

## 3.1 Creating tables

In [3]:
# Creating a table of UTF-8 characters
cur.execute(""" CREATE TABLE utf8_chars ( 
                    code_point   integer, 
                    character    text 
                    ); """)

# Creating a table to hold sample export data
cur.execute("""CREATE TABLE exports (
                    Type       text,
                    Commodity  text,
                    KGs        numeric,
                    Value      money,
                    Date       timestamp
                ); """)

conn.commit()

## 3.2 Inserting data 

In [4]:
# Inserting data to utf8_chars table
cur.execute("INSERT INTO utf8_chars VALUES (1, '\x01')")

for code_point in range(2,129):
    cur.execute("INSERT INTO utf8_chars VALUES (%s, %s)",
                (code_point, chr(code_point)))

# Inserting data into exports table
export_data = pd.read_csv('data.csv', parse_dates=['Date']).fillna(0)

for index in range(len(export_data)):
    cur.execute('INSERT INTO exports VALUES (%s, %s, %s, %s, %s);',
                 tuple(export_data.iloc[index])) 

conn.commit()
export_data.head()

Unnamed: 0,Type,Commodity,KGs,Value_of_Goods_Exported,Date
0,Cut-flowers,Anthurium leaves,3769.74,1707653.0,2015-01-25
1,Cut-flowers,Cut foliage,9040.37,4476063.0,2015-01-25
2,Cut-flowers,Fillers,58.0,18938.0,2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,121856.0,2015-01-25
4,Cut-flowers,Ranunculus,104.0,34827.0,2015-01-25


## 3.3 Fetching data

### 3.3.1 Select All

In [5]:
cur.execute('SELECT * FROM utf8_chars;')

one = cur.fetchone()
print(f'one: {one}')

next_ten = cur.fetchmany(10)
print(f'ten: {next_ten}')

all_remaining = cur.fetchall()
len(all_remaining)

one: (1, '\x01')
ten: [(2, '\x02'), (3, '\x03'), (4, '\x04'), (5, '\x05'), (6, '\x06'), (7, '\x07'), (8, '\x08'), (9, '\t'), (10, '\n'), (11, '\x0b')]


117

In [6]:
cur.execute('SELECT * FROM utf8_chars;')
everything = cur.fetchall() 

len(everything)

128

### 3.3.2 Specifying columns

In [7]:
# Fetching column names
cur.execute("SELECT column_name FROM information_schema.columns \
             WHERE table_name='exports';")
columns = cur.fetchall()
columns

[('kgs',), ('value',), ('date',), ('type',), ('commodity',)]

In [8]:
# Retrieving data in the database as a dataframe
columns = [i[0].title() for i in columns]

cur.execute('SELECT type, value FROM exports;')
export_data2 = pd.DataFrame(cur.fetchall(), columns=['Type', 'Value'])
export_data2.head()

Unnamed: 0,Type,Value
0,Cut-flowers,"Ksh1,707,653.00"
1,Cut-flowers,"Ksh4,476,063.00"
2,Cut-flowers,"Ksh18,938.00"
3,Cut-flowers,"Ksh121,856.00"
4,Cut-flowers,"Ksh34,827.00"


## 3.4 Deleting rows

In [9]:
# Deleting rows
cur.execute('DELETE FROM utf8_chars WHERE (code_point < 48 OR code_point >= 122);')
cur.execute("SELECT code_point FROM utf8_chars LIMIT 5;")
cur.fetchall()

[(48,), (49,), (50,), (51,), (52,)]

In [10]:
# Removing rows with zero export amount
cur.execute('DELETE FROM exports where kgs<=0.0;')
cur.execute('SELECT kgs FROM exports LIMIT 5;')
cur.fetchall()

[(Decimal('3769.74'),),
 (Decimal('9040.37'),),
 (Decimal('58.0'),),
 (Decimal('309.72'),),
 (Decimal('104.0'),)]

In [11]:
columns

['Kgs', 'Value', 'Date', 'Type', 'Commodity']

## 3.5 Basic operations and aggregation

In [12]:
# evaluating 'price_per_kg'
cur.execute("""SELECT commodity,kgs,value,(value/kgs) AS price_per_kg
               FROM exports 
               ORDER BY price_per_kg DESC;""")
               
pd.DataFrame(cur.fetchall(),
             columns=['commodity', 'kgs', 'value', 'price_per_kg']).head()

Unnamed: 0,commodity,kgs,value,price_per_kg
0,Geranium cuttings,609.0,"Ksh1,291,615,579.00","Ksh2,120,879.44"
1,Asters,3.0,"Ksh408,938.00","Ksh136,312.67"
2,Geranium cuttings,3045.0,"Ksh265,013,924.00","Ksh87,032.49"
3,Phlox,1910.57,"Ksh153,927,591.00","Ksh80,566.32"
4,Geranium cuttings,4206.74,"Ksh313,812,245.00","Ksh74,597.49"


In [13]:
cur.execute('''SELECT max(value), min(value), max(kgs), min(kgs)
               FROM exports;''')
cur.fetchall()

[('Ksh5,715,510,707.00', 'Ksh0.00', Decimal('11049185.48'), Decimal('0.47'))]

In [14]:
# Getting types of commodities
cur.execute('SELECT DISTINCT type FROM exports;')
cur.fetchall()

[('Fruits',), ('Cut-flowers',), ('Vegetables',)]

In [15]:
cur.execute("SELECT DISTINCT commodity FROM exports WHERE type='Vegetables';")
cur.fetchmany(10)

[('Aloe vera',),
 ('Amaranthus leaves',),
 ('Arrow roots(nduma)',),
 ('Asparagus',),
 ('Aubergines',),
 ('Baby corn',),
 ('Basil',),
 ('Bean processed',),
 ('Beetroot',),
 ('Broccoli',)]

In [16]:
# Basic statistics
cur.execute("""SELECT commodity, sum(kgs), max(kgs), round(avg(kgs), 4),
               sum(value) as product_total_value
               FROM exports GROUP BY commodity ORDER BY product_total_value DESC;""")
pd.DataFrame(cur.fetchall(),
             columns=['commodity', 'total_mass(kg)', 'largest_shipment(kg)',
                      'average_mass', 'total_value'])

Unnamed: 0,commodity,total_mass(kg),largest_shipment(kg),average_mass,total_value
0,Roses,105164149.83,11049185.48,8763679.1525,"Ksh44,205,855,724.00"
1,Mixed vegetables,28129937.97,3565356.17,2344161.4975,"Ksh9,122,920,193.00"
2,Cuttings,2609221.55,1176784.0,217435.1292,"Ksh8,710,295,551.00"
3,Fine beans,23090599.07,3003347.17,1924216.5892,"Ksh7,109,839,896.00"
4,Avocado,31225859.63,5302741.82,2602154.9692,"Ksh4,278,199,690.00"
...,...,...,...,...,...
143,Sweet potatoes,32.0,32.0,32.0000,Ksh464.00
144,Gerbera,1.70,1.23,0.8500,Ksh369.00
145,Cucumber,10.0,10.0,10.0000,Ksh50.00
146,Celery,5.0,5.0,5.0000,Ksh30.00


# 4. Deleting Objects & Clean Up

In [17]:
# Deleting the created tables
cur.execute('DROP TABLE utf8_chars;')
cur.execute('DROP TABLE exports;')

In [18]:
# Removing everything owned by our 'dummy user' 
cur.execute('DROP OWNED BY dummy_user;')

In [19]:
# Closing the connection to database
cur.close()
conn.close()

In [20]:
%%bash
# Deleting the 'testing_psycopg' database
dropdb testing_psycopg

In [21]:
%%bash
# Deleting the 'dummy user'
dropuser dummy_user