In [1]:
import pandas as pd
import psycopg2 as ps

# 1. Creating a User & Database - Optional

If you'd like to create a `user` and `database` to work with just for this session, enter the following commands in your terminal: (Both the user and database, along with all other objects created will be removed at the end.)
```bash
$ createuser "Dinosaur" --createdb -P
$ createdb testing_psycopg
```

# 2. Connecting to the Postgres Server

In [2]:
#Ammend as necessary if using a different role/user
conn = ps.connect("dbname=testing_psycopg user=Dinosaur password=Dinosaur123 \
                   host=localhost port=5432")

#Initialising the cursor
cur = conn.cursor()

# 3. Basic Database Operations

## 3.1 Creating Tables

In [3]:
#Creating a table of UTF-8 characters
cur.execute("""
            CREATE TABLE utf8_chars ( 
                    code_point integer, 
                    character text 
                    );
            """)

# Creating a table to hold sample export data
cur.execute(""" 
            CREATE TABLE exports (
                    Type text,
                    Commodity text,
                    KGs numeric,
                    Value money,
                    Date timestamp
                );
            """)

## 3.2 Inserting Data into the Database

In [4]:
# Inserting data to utf8_chars table
cur.execute("INSERT INTO utf8_chars VALUES (1,'\x01')")

for code_point in range(2,129):
    cur.execute("INSERT INTO utf8_chars VALUES (%s, %s)",(code_point,chr(code_point)))

# Inserting data into exports table
export_data = pd.read_csv('data.csv', parse_dates=True).fillna(0)

for index in range(len(export_data)):
    cur.execute('INSERT INTO exports VALUES (%s, %s, %s, %s, %s);',
                                             tuple(export_data.iloc[index])) 

export_data.head()

Unnamed: 0,Type,Commodity,KGs,Value_of_Goods_Exported,Date
0,Cut-flowers,Anthurium leaves,3769.74,1707653.0,01/25/2015 12:00:00 AM
1,Cut-flowers,Cut foliage,9040.37,4476063.0,01/25/2015 12:00:00 AM
2,Cut-flowers,Fillers,58.0,18938.0,01/25/2015 12:00:00 AM
3,Cut-flowers,Leather leaves/fern,309.72,121856.0,01/25/2015 12:00:00 AM
4,Cut-flowers,Ranunculus,104.0,34827.0,01/25/2015 12:00:00 AM


## 3.3 Fetching Data from the Database

In [5]:
cur.execute('SELECT * FROM utf8_chars;')

# Assigning the first row to variable 'one'
one = cur.fetchone()

# Assigning the next 10 rows to variable 'ten'
ten = cur.fetchmany(10)

# Assigning all available rows to variable 'remaining'
remaining = cur.fetchall() 

print('--one: ', one,'--ten:', *ten, sep='\n')

--one: 
(1, '\x01')
--ten:
(2, '\x02')
(3, '\x03')
(4, '\x04')
(5, '\x05')
(6, '\x06')
(7, '\x07')
(8, '\x08')
(9, '\t')
(10, '\n')
(11, '\x0b')


In [6]:
# Assigning all rows to variable 'everything'
cur.execute('SELECT * FROM utf8_chars;')
everything = cur.fetchall() 

len(remaining), len(everything)

(117, 128)

In [7]:
#Fetching column names
cur.execute("SELECT column_name FROM information_schema.columns \
             WHERE table_name='exports';")
columns = cur.fetchall()

In [8]:
# Retrieving data in the database as a dataframe
columns = [i[0].title() for i in columns]

cur.execute('SELECT * FROM exports;')
export_data2 = pd.DataFrame(cur.fetchall(), columns=columns)
export_data2.head()

Unnamed: 0,Type,Commodity,Kgs,Value,Date
0,Cut-flowers,Anthurium leaves,3769.74,"Ksh1,707,653.00",2015-01-25
1,Cut-flowers,Cut foliage,9040.37,"Ksh4,476,063.00",2015-01-25
2,Cut-flowers,Fillers,58.0,"Ksh18,938.00",2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,"Ksh121,856.00",2015-01-25
4,Cut-flowers,Ranunculus,104.0,"Ksh34,827.00",2015-01-25


## 3.4 Miscellaneous SQL Queries

In [9]:
# Deleting rows
cur.execute('DELETE FROM utf8_chars WHERE (code_point < 48 OR code_point >= 122);')
cur.execute("SELECT code_point FROM utf8_chars")
print(cur.fetchall())

[(48,), (49,), (50,), (51,), (52,), (53,), (54,), (55,), (56,), (57,), (58,), (59,), (60,), (61,), (62,), (63,), (64,), (65,), (66,), (67,), (68,), (69,), (70,), (71,), (72,), (73,), (74,), (75,), (76,), (77,), (78,), (79,), (80,), (81,), (82,), (83,), (84,), (85,), (86,), (87,), (88,), (89,), (90,), (91,), (92,), (93,), (94,), (95,), (96,), (97,), (98,), (99,), (100,), (101,), (102,), (103,), (104,), (105,), (106,), (107,), (108,), (109,), (110,), (111,), (112,), (113,), (114,), (115,), (116,), (117,), (118,), (119,), (120,), (121,)]


In [10]:
#Removing rows with zero export amount
cur.execute('DELETE FROM exports where kgs<=0.0;')

#evaluating 'price_per_kg'
cur.execute("""SELECT commodity,kgs,value,(value/kgs) AS price_per_kg
               FROM exports 
               ORDER BY price_per_kg DESC;""")
               
pd.DataFrame(cur.fetchall(), columns=['commodity','kgs','value','price_per_kg']).head()

Unnamed: 0,commodity,kgs,value,price_per_kg
0,Geranium cuttings,609.0,"Ksh1,291,615,579.00","Ksh2,120,879.44"
1,Asters,3.0,"Ksh408,938.00","Ksh136,312.67"
2,Geranium cuttings,3045.0,"Ksh265,013,924.00","Ksh87,032.49"
3,Phlox,1910.57,"Ksh153,927,591.00","Ksh80,566.32"
4,Geranium cuttings,4206.74,"Ksh313,812,245.00","Ksh74,597.49"


In [11]:
#Getting types of commodities
cur.execute('SELECT DISTINCT type FROM exports;')
cur.fetchall()

[('Fruits',), ('Cut-flowers',), ('Vegetables',)]

In [12]:
#Getting names of vegetables exported
cur.execute("SELECT DISTINCT commodity FROM exports WHERE type='Vegetables';")
cur.fetchmany(10)

[('Aloe vera',),
 ('Amaranthus leaves',),
 ('Arrow roots(nduma)',),
 ('Asparagus',),
 ('Aubergines',),
 ('Baby corn',),
 ('Basil',),
 ('Bean processed',),
 ('Beetroot',),
 ('Broccoli',)]

In [13]:
# Basic statistics
cur.execute("""SELECT commodity, sum(kgs), max(kgs), round(avg(kgs), 4), sum(value)
               FROM exports GROUP BY commodity;""")
pd.DataFrame(cur.fetchall(), columns=['commodity','total_mass(kg)','largest_shipment(kg)',
                                      'average_mass','total_value']
            ).sort_values(by='largest_shipment(kg)', ascending=False)

Unnamed: 0,commodity,total_mass(kg),largest_shipment(kg),average_mass,total_value
72,Roses,105164149.83,11049185.48,8763679.1525,"Ksh44,205,855,724.00"
118,Avocado,31225859.63,5302741.82,2602154.9692,"Ksh4,278,199,690.00"
98,Mango,14047648.26,4655128.34,1170637.3550,"Ksh1,612,449,629.00"
101,Mixed vegetables,28129937.97,3565356.17,2344161.4975,"Ksh9,122,920,193.00"
50,Fine beans,23090599.07,3003347.17,1924216.5892,"Ksh7,109,839,896.00"
...,...,...,...,...,...
26,Dried carnations,6.96,6.96,6.9600,"Ksh5,181.00"
78,Melissa,6.0,6.0,6.0000,"Ksh2,724.00"
94,Celery,5.0,5.0,5.0000,Ksh30.00
121,Gerbera,1.70,1.23,0.8500,Ksh369.00


# 4. Deleting Created Objects & Users

In [14]:
#Deleting the tables
cur.execute('DROP TABLE utf8_chars;')
cur.execute('DROP TABLE exports;')

In [15]:
#Removing everything owned by the user "Dinosaur"
cur.execute('DROP OWNED BY "Dinosaur";')

del export_data, export_data2, one, ten, remaining, everything

In [16]:
#Closing the connection to database
cur.close()
conn.close()

In [17]:
#Deleting the 'experiments' database
!dropdb testing_psycopg

In [18]:
#Deleting the user
!dropuser 'Dinosaur'