In [1]:
import pandas as pd
import psycopg2 as ps

# 1. Creating a PostgreSQL User/Role

First create a user (eg. one named Chimp), with privileges to create databases and other users, by entering the following in a terminal/command line:

```createuser "Chimp" --createrole --createdb -P```

and assigning a password as prompted.

**Note:** This could have been run right in this notebook using ```!createuser "Chimp" --createrole --createdb -P```, but it would've been a challenge responding to the password prompts.

# 2. Creating a Database

To create a database called experiments, either:
- type `createdb experiments` in a terminal/command line, or
- preface the command with '!' and run it in the jupyter notebook, as shown below.

In [2]:
#Creating a database called experiments with the createdb shell command
!createdb experiments

# 3. Connecting to a database

In [3]:
#Logging in to database 'experiments' as user 'Chimp'
#Ammend as necessary if using a different role
conn=ps.connect("dbname=experiments user=Chimp password=chimp123 host=localhost ")

#Cursor to perform database operations
cur=conn.cursor()

# 4. Basic Database Operations

In [4]:
#Creating a table called utf8_chars with 2 columns: code_point & character
cur.execute("""
            CREATE TABLE utf8_chars ( 
                        code_point integer, 
                        character text 
                    );
            """)
conn.commit()

In [5]:
#Inserting values into the table
cur.execute("INSERT INTO utf8_chars VALUES (1,'a'), (2, 'b'), (3,'c');")
conn.commit()

In [6]:
#Querying for stored data
cur.execute('SELECT * FROM utf8_chars;')
cur.fetchall()

[(1, 'a'), (2, 'b'), (3, 'c')]

In [7]:
#Iterative insertion
for x in range(33,129):
    cur.execute("INSERT INTO utf8_chars VALUES (%s, %s)",(x,chr(x)))
    
conn.commit()

#Querying for stored data
cur.execute('SELECT * FROM utf8_chars;')
cur.fetchall()  

[(1, 'a'),
 (2, 'b'),
 (3, 'c'),
 (33, '!'),
 (34, '"'),
 (35, '#'),
 (36, '$'),
 (37, '%'),
 (38, '&'),
 (39, "'"),
 (40, '('),
 (41, ')'),
 (42, '*'),
 (43, '+'),
 (44, ','),
 (45, '-'),
 (46, '.'),
 (47, '/'),
 (48, '0'),
 (49, '1'),
 (50, '2'),
 (51, '3'),
 (52, '4'),
 (53, '5'),
 (54, '6'),
 (55, '7'),
 (56, '8'),
 (57, '9'),
 (58, ':'),
 (59, ';'),
 (60, '<'),
 (61, '='),
 (62, '>'),
 (63, '?'),
 (64, '@'),
 (65, 'A'),
 (66, 'B'),
 (67, 'C'),
 (68, 'D'),
 (69, 'E'),
 (70, 'F'),
 (71, 'G'),
 (72, 'H'),
 (73, 'I'),
 (74, 'J'),
 (75, 'K'),
 (76, 'L'),
 (77, 'M'),
 (78, 'N'),
 (79, 'O'),
 (80, 'P'),
 (81, 'Q'),
 (82, 'R'),
 (83, 'S'),
 (84, 'T'),
 (85, 'U'),
 (86, 'V'),
 (87, 'W'),
 (88, 'X'),
 (89, 'Y'),
 (90, 'Z'),
 (91, '['),
 (92, '\\'),
 (93, ']'),
 (94, '^'),
 (95, '_'),
 (96, '`'),
 (97, 'a'),
 (98, 'b'),
 (99, 'c'),
 (100, 'd'),
 (101, 'e'),
 (102, 'f'),
 (103, 'g'),
 (104, 'h'),
 (105, 'i'),
 (106, 'j'),
 (107, 'k'),
 (108, 'l'),
 (109, 'm'),
 (110, 'n'),
 (111, 'o'),
 (112,

In [8]:
#Conditional deletion of rows
cur.execute('DELETE FROM utf8_chars WHERE (code_point < 48 OR code_point > 122);')
conn.commit()

cur.execute('SELECT * FROM utf8_chars;')
cur.fetchall()

[(48, '0'),
 (49, '1'),
 (50, '2'),
 (51, '3'),
 (52, '4'),
 (53, '5'),
 (54, '6'),
 (55, '7'),
 (56, '8'),
 (57, '9'),
 (58, ':'),
 (59, ';'),
 (60, '<'),
 (61, '='),
 (62, '>'),
 (63, '?'),
 (64, '@'),
 (65, 'A'),
 (66, 'B'),
 (67, 'C'),
 (68, 'D'),
 (69, 'E'),
 (70, 'F'),
 (71, 'G'),
 (72, 'H'),
 (73, 'I'),
 (74, 'J'),
 (75, 'K'),
 (76, 'L'),
 (77, 'M'),
 (78, 'N'),
 (79, 'O'),
 (80, 'P'),
 (81, 'Q'),
 (82, 'R'),
 (83, 'S'),
 (84, 'T'),
 (85, 'U'),
 (86, 'V'),
 (87, 'W'),
 (88, 'X'),
 (89, 'Y'),
 (90, 'Z'),
 (91, '['),
 (92, '\\'),
 (93, ']'),
 (94, '^'),
 (95, '_'),
 (96, '`'),
 (97, 'a'),
 (98, 'b'),
 (99, 'c'),
 (100, 'd'),
 (101, 'e'),
 (102, 'f'),
 (103, 'g'),
 (104, 'h'),
 (105, 'i'),
 (106, 'j'),
 (107, 'k'),
 (108, 'l'),
 (109, 'm'),
 (110, 'n'),
 (111, 'o'),
 (112, 'p'),
 (113, 'q'),
 (114, 'r'),
 (115, 's'),
 (116, 't'),
 (117, 'u'),
 (118, 'v'),
 (119, 'w'),
 (120, 'x'),
 (121, 'y'),
 (122, 'z')]

# 5. Saving & Retrieving Data with Pandas

## 5.1 Saving a Data-frame to the Database

In [9]:
df=pd.read_csv('data.csv', parse_dates=[4]).fillna(0)
df.head()

Unnamed: 0,Type,Commodity,KGs,Value_of_Goods_Exported,Date
0,Cut-flowers,Anthurium leaves,3769.74,1707653.0,2015-01-25
1,Cut-flowers,Cut foliage,9040.37,4476063.0,2015-01-25
2,Cut-flowers,Fillers,58.0,18938.0,2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,121856.0,2015-01-25
4,Cut-flowers,Ranunculus,104.0,34827.0,2015-01-25


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 5 columns):
Type                       1800 non-null object
Commodity                  1800 non-null object
KGs                        1800 non-null float64
Value_of_Goods_Exported    1800 non-null float64
Date                       1800 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 70.4+ KB


In [11]:
#Creating the table to hold the data
cur.execute(""" 
            CREATE TABLE exports (
                    Type text,
                    Commodity text,
                    KGs numeric,
                    Value money,
                    Date timestamp
                );
            """)
conn.commit()

In [12]:
#Populating the table
for x in range(len(df)):
    cur.execute('INSERT INTO exports VALUES (%s,%s,%s,%s,%s);',
               tuple(df.iloc[x])) #create a tuple of each row's vales
conn.commit()

## 5.2 Retrieving stored data

In [13]:
cur.execute('SELECT * FROM exports;')
stored_data=cur.fetchall()
pd.DataFrame(stored_data)

Unnamed: 0,0,1,2,3,4
0,Cut-flowers,Anthurium leaves,3769.74,"Ksh1,707,653.00",2015-01-25
1,Cut-flowers,Cut foliage,9040.37,"Ksh4,476,063.00",2015-01-25
2,Cut-flowers,Fillers,58.0,"Ksh18,938.00",2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,"Ksh121,856.00",2015-01-25
4,Cut-flowers,Ranunculus,104.0,"Ksh34,827.00",2015-01-25
...,...,...,...,...,...
1795,Vegetables,Turnips,63.0,"Ksh42,647.00",2015-12-25
1796,Vegetables,Valore,0.0,Ksh0.00,2015-12-25
1797,Vegetables,Bean processed,770650.88,"Ksh115,660,099.00",2015-12-25
1798,Vegetables,Snow/mangetout,260.0,"Ksh125,520.00",2015-12-25


In [14]:
#Fetching column names
cur.execute("""SELECT column_name FROM information_schema.columns
                    WHERE table_name='exports';
            """)
cols=cur.fetchall()
cols

[('type',), ('commodity',), ('kgs',), ('value',), ('date',)]

In [15]:
cols=[i[0].title() for i in cols]
cols

['Type', 'Commodity', 'Kgs', 'Value', 'Date']

In [16]:
df2=pd.DataFrame(stored_data, columns=cols)
df2.head()

Unnamed: 0,Type,Commodity,Kgs,Value,Date
0,Cut-flowers,Anthurium leaves,3769.74,"Ksh1,707,653.00",2015-01-25
1,Cut-flowers,Cut foliage,9040.37,"Ksh4,476,063.00",2015-01-25
2,Cut-flowers,Fillers,58.0,"Ksh18,938.00",2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,"Ksh121,856.00",2015-01-25
4,Cut-flowers,Ranunculus,104.0,"Ksh34,827.00",2015-01-25


In [17]:
#Restoring 'Kgs' column to numeric data type
df2.Kgs=[float(x) for x in df2.Kgs]

#Removing currency label and commas, then restoring 'Value' column to numeric data type
df2.Value=[float(x.strip('Ksh').replace(',','')) for x in df2.Value]

df2.head()

Unnamed: 0,Type,Commodity,Kgs,Value,Date
0,Cut-flowers,Anthurium leaves,3769.74,1707653.0,2015-01-25
1,Cut-flowers,Cut foliage,9040.37,4476063.0,2015-01-25
2,Cut-flowers,Fillers,58.0,18938.0,2015-01-25
3,Cut-flowers,Leather leaves/fern,309.72,121856.0,2015-01-25
4,Cut-flowers,Ranunculus,104.0,34827.0,2015-01-25


In [18]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 5 columns):
Type         1800 non-null object
Commodity    1800 non-null object
Kgs          1800 non-null float64
Value        1800 non-null float64
Date         1800 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 70.4+ KB


# 6 Creating Other Users

In [19]:
#Creating user Owl who can create databases and other users
cur.execute('CREATE USER "Owl" CREATEROLE CREATEDB PASSWORD \'owl123\';')

#Creating user Gorilla who can only create databases
cur.execute('CREATE USER "Gorilla" CREATEDB;')

conn.commit()

In [20]:
#Checking if the users were created
cur.execute('GRANT "Gorilla","Owl" to "Chimp";') # assigning the roles to Chimp
cur.execute("SELECT role_name FROM information_schema.enabled_roles;")
cur.fetchall()

[('Chimp',), ('Owl',), ('Gorilla',)]

# 7. Deleting Created Objects & Users

In [21]:
#Deleting created tables
cur.execute('DROP TABLE utf8_chars;')
cur.execute('DROP TABLE exports;')
conn.commit()

In [22]:
#Removing created users
cur.execute('DROP ROLE "Owl"')
cur.execute('DROP ROLE "Gorilla"')
conn.commit()

In [23]:
#Removing everything owned by Chimp
cur.execute('DROP OWNED BY "Chimp";')
conn.commit()

In [24]:
#Closing connection to database
cur.close()
conn.close()

In [25]:
#Deleting database 'experiments'
!dropdb experiments

In [26]:
#Deleting user Chimp
!dropuser 'Chimp'