In [1]:
import sqlite3
import pandas as pd

In [2]:
# DDL: Data Definition Language
# DML: Data Manipulation Language

In [44]:
def db_ddl(cmd):
    conn = sqlite3.connect("./Data/database.db")
    c = conn.cursor()
    c.execute(cmd)
    data = c.fetchall()
    conn.close()
    return data

def db_dml(cmd, value=0):
    conn = sqlite3.connect("./Data/database.db")
    c = conn.cursor()
    if value:
        c.execute(cmd, value)
    else: 
        c.execute(cmd)
    conn.commit()
    conn.close()
    
def drop_table(table):
    db_ddl("drop table '{}'".format(table))
    
def read_table(table):
    conn = sqlite3.connect("./Data/database.db")
    data = pd.read_sql_query("select * from {}".format(table), conn)
    conn.close()
    return data

def check_data(data_list):
    return len(set([len(i) for i in data_list]))

In [4]:
#initialize db
db_dml("")

In [5]:
# db definitions
tables = ['EMPLOYEE', 'DEPARTMENT', 'DEPT_LOCATIONS', 'PROJECT', 'WORKS_ON', 'DEPENDENT']
employee = "('Fname' varchar(255), 'Minit' int, 'Lname' varchar(255), 'Ssn' int, 'Bdate' date, 'Address' varchar(255), 'Sex' varchar(255), 'Salary' float, 'Super_ssn' int, 'Dno' int, primary key ('Ssn'))"
department = "('Dname' varchar(255), 'Dnumber' int, 'Mgr_ssn' int, 'Mgr_start_date' date, primary key ('Dnumber'))"
dept_locations = "('Dnumber' int, 'Dlocation' varchar(255), primary key(Dnumber, Dlocation))"
project = "('Pname' varchar(255), 'Pnumber' int, 'Plocation' varchar(255), 'Dnum' int, primary key ('Pnumber'))"
works_on = "('Essn' int, 'Pno' int, 'Hours' float, primary key ('Essn', 'Pno'))"
dependent = "('Essn' int, 'Dependent_name' varchar(255), 'Sex' varchar(255), 'Bdate' date, 'Relationship' varchar(255), primary key(Essn, Dependent_name))"
fields = [employee, department, dept_locations, project, works_on, dependent]

In [6]:
for i in range(len(tables)):
    db_ddl("create table if not exists {} {}".format(tables[i], fields[i]))

In [7]:
# check tables in database
conn = sqlite3.connect("./Data/database.db")
print(pd.read_sql_query("select name from sqlite_master where type='table'", conn))

             name
0        EMPLOYEE
1      DEPARTMENT
2  DEPT_LOCATIONS
3         PROJECT
4        WORKS_ON
5       DEPENDENT


In [8]:
# check table definitions
for i in tables:
    print(i, ' TABLE\n', pd.read_sql_query("select * from pragma_table_info('{}')".format(i), conn), '\n')

EMPLOYEE  TABLE
    cid       name          type  notnull dflt_value  pk
0    0      Fname  varchar(255)        0       None   0
1    1      Minit           int        0       None   0
2    2      Lname  varchar(255)        0       None   0
3    3        Ssn           int        0       None   1
4    4      Bdate          date        0       None   0
5    5    Address  varchar(255)        0       None   0
6    6        Sex  varchar(255)        0       None   0
7    7     Salary         float        0       None   0
8    8  Super_ssn           int        0       None   0
9    9        Dno           int        0       None   0 

DEPARTMENT  TABLE
    cid            name          type  notnull dflt_value  pk
0    0           Dname  varchar(255)        0       None   0
1    1         Dnumber           int        0       None   1
2    2         Mgr_ssn           int        0       None   0
3    3  Mgr_start_date          date        0       None   0 

DEPT_LOCATIONS  TABLE
    cid       nam

In [9]:
for i in tables:
    print(i, ' TABLE\n', list(pd.read_sql_query("select name from pragma_table_info('{}')".format(i), conn)['name']), '\n')

EMPLOYEE  TABLE
 ['Fname', 'Minit', 'Lname', 'Ssn', 'Bdate', 'Address', 'Sex', 'Salary', 'Super_ssn', 'Dno'] 

DEPARTMENT  TABLE
 ['Dname', 'Dnumber', 'Mgr_ssn', 'Mgr_start_date'] 

DEPT_LOCATIONS  TABLE
 ['Dnumber', 'Dlocation'] 

PROJECT  TABLE
 ['Pname', 'Pnumber', 'Plocation', 'Dnum'] 

WORKS_ON  TABLE
 ['Essn', 'Pno', 'Hours'] 

DEPENDENT  TABLE
 ['Essn', 'Dependent_name', 'Sex', 'Bdate', 'Relationship'] 



In [10]:
# EMPLOYEE table
Fname = ['John', 'Franklin', 'Alicia', 'Jennifer', 'Ramesh', 'Joyce', 'Ahmad', 'James']
Minit = ['B', 'T', 'J', 'S', 'K', 'A', 'V', 'E']
Lname = ['Smith', 'Wong', 'Zelaya', 'Wallace', 'Narayan', 'English', 'Jabbar', 'Borg']
Ssn = [123456789, 333445555, 999887777, 9876554321, 666884444, 453453453, 987987987, 888665555]
Bdate = ['1965-01-09', '1955-12-08', '1968-01-19', '1941-06-20', '1962-09-15', '1972-07-31', '1969-03-29', '1937-11-10']
Address = ['731 F, Houston, TX', '638 V, Houston, TX', '3321 C, Spring, TX', '291 B, Bellaire, TX', '975 F, Humble, Tx', '5631 R, Houston, TX', '980 D, Houston, TX', '450 S, Houston, TX']
Sex = ['M', 'M', 'F', 'F', 'M', 'F', 'M', 'M']
Salary = [30000, 40000, 25000, 43000, 38000, 25000, 25000, 55000]
Super_ssn = [333445555, 888665555, 987654321, 888665555, 333445555, 333445555, 987654321, (None)]
Dno = [5, 5, 4, 4, 5, 5, 4, 1]
employee_table = [Fname, Minit, Lname, Ssn, Bdate, Address, Sex, Salary, Super_ssn, Dno]

In [11]:
for i in range(len(Fname)):
    db_dml("insert or ignore into employee ('Fname', 'Minit', 'Lname', 'Ssn', 'Bdate', 'Address', 'Sex', 'Salary', 'Super_ssn', 'Dno') values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (Fname[i], Minit[i], Lname[i], Ssn[i], Bdate[i], Address[i], Sex[i], Salary[i], Super_ssn[i], Dno[i]))

In [12]:
# Check table values
read_table('EMPLOYEE')

Unnamed: 0,Fname,Minit,Lname,Ssn,Bdate,Address,Sex,Salary,Super_ssn,Dno
0,John,B,Smith,123456789,1965-01-09,"731 F, Houston, TX",M,30000.0,333445555.0,5
1,Franklin,T,Wong,333445555,1955-12-08,"638 V, Houston, TX",M,40000.0,888665555.0,5
2,Alicia,J,Zelaya,999887777,1968-01-19,"3321 C, Spring, TX",F,25000.0,987654321.0,4
3,Jennifer,S,Wallace,9876554321,1941-06-20,"291 B, Bellaire, TX",F,43000.0,888665555.0,4
4,Ramesh,K,Narayan,666884444,1962-09-15,"975 F, Humble, Tx",M,38000.0,333445555.0,5
5,Joyce,A,English,453453453,1972-07-31,"5631 R, Houston, TX",F,25000.0,333445555.0,5
6,Ahmad,V,Jabbar,987987987,1969-03-29,"980 D, Houston, TX",M,25000.0,987654321.0,4
7,James,E,Borg,888665555,1937-11-10,"450 S, Houston, TX",M,55000.0,,1


In [13]:
# DEPARTMENT Table
Dname = ['Research', 'Administration', 'Headquarters']
Dnumber = [5, 4, 1]
Mgr_ssn = [333445555, 987654321, 888665555]
Mgr_start_date = ['1988-05-22', '1995-01-01', '1981-06-19']

In [14]:
for i in range(len(Dname)):
    db_dml("insert or ignore into department ('Dname', 'Dnumber', 'Mgr_ssn', 'Mgr_start_date') values (?, ?, ?, ?)", (Dname[i], Dnumber[i], Mgr_ssn[i], Mgr_start_date[i]))

In [15]:
read_table('DEPARTMENT')

Unnamed: 0,Dname,Dnumber,Mgr_ssn,Mgr_start_date
0,Research,5,333445555,1988-05-22
1,Administration,4,987654321,1995-01-01
2,Headquarters,1,888665555,1981-06-19


In [16]:
# DEPT_LOCATIONS table
Dnumber = [1, 4, 5, 5, 5]
Dlocation = ['Houston', 'Stafford', 'Bellaire', 'Sugarland', 'Houston']

In [17]:
for i in range(len(Dnumber)):
    db_dml("insert or ignore into dept_locations ('Dnumber', 'Dlocation') values (?, ?)", (Dnumber[i], Dlocation[i]))

In [18]:
read_table('dept_locations')

Unnamed: 0,Dnumber,Dlocation
0,1,Houston
1,4,Stafford
2,5,Bellaire
3,5,Sugarland
4,5,Houston


In [22]:
# WORKS_ON table
Essn = [123456789, 123456789, 666884444, 453453453, 453453453, 333445555, 333445555, 333445555, 333445555, 999887777, 999887777, 987987987, 987987987, 987654321, 987654321, 888665555]
Pno = [1, 2, 3, 1, 2, 2, 3, 10, 20, 30, 10, 10, 30, 30, 20, 20]
Hours = [32.5, 7.5, 40, 20, 20, 10, 10, 10, 10, 30, 10, 35, 5, 20, 15, None]
works_on_table = [Essn, Pno, Hours]

In [None]:
check_data

In [27]:
for i in range(len(Essn)):
    db_dml("insert or ignore into works_on ('Essn', 'Pno', 'Hours') values (?, ?, ?)", (Essn[i], Pno[i], Hours[i]))

In [28]:
read_table("works_on")

Unnamed: 0,Essn,Pno,Hours
0,123456789,1,32.5
1,123456789,2,7.5
2,666884444,3,40.0
3,453453453,1,20.0
4,453453453,2,20.0
5,333445555,2,10.0
6,333445555,3,10.0
7,333445555,10,10.0
8,333445555,20,10.0
9,999887777,30,30.0


In [30]:
# PROJECT table
Pname = ['ProductX', 'ProductY', 'ProductZ', 'Computerization', 'Reorganization', 'Newbenefits']
Pnumber = [1, 2, 3, 10, 20, 30]
Plocation = ['Bellaire', 'Sugarland', 'Houston', 'Stafford', 'Houston', 'Stafford']
Dnum = [5, 5, 5, 4, 1, 4]
project_table = [Pname, Pnumber, Plocation, Dnum]

In [31]:
for i in range(len(Pname)):
    db_dml("insert or ignore into project ('Pname', 'Pnumber', 'Plocation', 'Dnum') values (?, ?, ?, ?)", (Pname[i], Pnumber[i], Plocation[i], Dnum[i]))

In [32]:
read_table('project')

Unnamed: 0,Pname,Pnumber,Plocation,Dnum
0,ProductX,1,Bellaire,5
1,ProductY,2,Sugarland,5
2,ProductZ,3,Houston,5
3,Computerization,10,Stafford,4
4,Reorganization,20,Houston,1
5,Newbenefits,30,Stafford,4


In [36]:
# DEPENDENT
Essn = [333445555, 333445555, 333445555, 987654321, 123456789, 123456789, 123456789]
Dependent_name = ['Alice', 'Theodore', 'Joy', 'Abner', 'Michael', 'Alice', 'Elizebeth']
Sex = ['F', 'M', 'F', 'M', 'M', 'F', 'F']
Bdate = ['1986-04-05', '1983-10-25', '1958-05-03', '1942-02-28', '1988-01-04', '1988-12-30', '1967-05-05']
Relationship = ['Daughter', 'Son', 'Spouse', 'Spouse', 'Son', 'Daughter', 'Spouse']
dependent_table = [Essn, Dependent_name, Sex, Bdate, Relationship]

In [45]:
check_data(dependent_table)

1

In [None]:
for i in tables:
    print(read_table(i))