# Part V: Additional SELECT queries and SQL commands via psycopg2:

# Overview:

## The following code inserts additional rows of data into the companies table, and then performs several SELECT queries, using some more advanced methods, such as: 1.) boolean operators; 2.) aggregate methods such as COUNT, SUM, AVG and clauses that re-arrange aggregations, such as GROUP BY and ORDER BY.

# Insert additional rows of data into the companies table: currently, all data is for Redwood City companies, so let's insert some data for companies located in SF:

In [15]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct  9 02:18:21 2018

@author: kevinallen
"""

'''This code creates a new table called companies (deleting such a table if it currently exists),
and then inserts data into the newly-created table. Note these are the column names and types of data
for each columns "company_name varchar,id serial PRIMARY KEY, n_employees (i.e., the # of employees) integer, location varchar"'''

import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: there are 2 sources for this table's data.
    # 1.) For the Redowod City data, the source is a 2016 Redwood City government report on the largest companies located in Redwood city:
    #URL: <https://www.redwoodcity.org/business/economic-indicators-dashboard/top-private-employers> 
    #2.) For the SF data, the source is the SFCED: <https://sfced.org/wp-content/uploads/2016/06/Largest-Employers-Jun-2016.pdf>
    
    cursor.execute("""
                   INSERT INTO companies VALUES ('Wells Fargo', 11, 8245,'San Francisco');
                   INSERT INTO companies VALUES ('Salesforce', 12, 5870,'San Francisco');
                   INSERT INTO companies VALUES ('Uber', 13, 1980,'San Francisco');
                   INSERT INTO companies VALUES ('Yelp', 14, 1717,'San Francisco');
                   INSERT INTO companies VALUES ('Google', 15, 1500,'San Francisco');
                   """)
    
except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

# Find the average number of employees per company for companies located within San Francisco:

In [27]:
import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: the source of this data is a 2016 Redwood City government report on the largest companies located in Redwood city
    
    cursor.execute(""" SELECT AVG(n_employees) FROM companies WHERE location='San Francisco';
                   --Returns the average # of employees for the companies located in SF;
                   """)
    
    results = cursor.fetchall()
    for res in results:
        print(res)

except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

(Decimal('3862.4000000000000000'),)


### On average, there are ~3,862 employees per company in San Francisco.

# Select companies whose names either start with 'S' or whose names end with 'e':

##  I.e., use 2 SELECT...WHERE...LIKE commands with an OR boolean operator:

In [32]:
import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: the source of this data is a 2016 Redwood City government report on the largest companies located in Redwood city
    
    cursor.execute("""SELECT * FROM companies WHERE company_name LIKE 'S%' OR company_name LIKE '%e';
                   --Selects companies whose names start with the upper-case letter S;
                   """)
                   
    results = cursor.fetchall()
    for res in results:
        print(res)

except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

('Oracle', 1, 6781, 'Redwood City')
('Stanford Hospital', 2, 750, 'Redwood City')
('Zazzle', 3, 190, 'Redwood City')
('SUMO LOGIC', 4, 234, 'Redwood City')
('Shuttterfly', 9, 481, 'Redwood City')
('Starvista', 10, 330, 'Redwood City')
('Salesforce', 12, 5870, 'San Francisco')
('Google', 15, 1500, 'San Francisco')


There are several companies that fit 1 or both of these criteria. 

#  What is the total number of employees for companies that have more than 1,000 employees? Order the query results by company name.

# I.e.: Do a SELECT query to perform a sum aggregate on companies that have >1000 employees; group the results by company name (i.e., do a GROUP BY and ORDER BY):


In [23]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov  5 21:08:51 2018

@author: kevinallen
"""

'''This code runs several SELECT queries, including filtering using the WHERE clause, the WHERE...LIKE clause, and aggregate funcitons such as SUM and AVG'''

import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: the source of this data is a 2016 Redwood City government report on the largest companies located in Redwood city
    
    cursor.execute("""SELECT SUM(n_employees), company_name FROM companies GROUP BY company_name HAVING SUM(n_employees)>1000 ORDER BY company_name;
                   --Returns the sum of the number of employees and the company name for companies that have more than 1000 employees;           
    """)
    
    results = cursor.fetchall()
    for res in results:
        print(res)

except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

(2367, 'Electronic Arts')
(1500, 'Google')
(6781, 'Oracle')
(5870, 'Salesforce')
(1980, 'Uber')
(8245, 'Wells Fargo')
(1717, 'Yelp')


### Notice that Wells Fargo is the largest in the table. All others of these large companies are tech companies, however, such as Oracle and Salesforce.

# SELECT query: What are the total number of companies located in Redwood City saved in the companies table? Use COUNT() command:

In [7]:
import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: the source of this data is a 2016 Redwood City government report on the largest companies located in Redwood city
    
    cursor.execute("""SELECT COUNT(n_employees) FROM companies WHERE location = 'Redwood City';
                   --Returns the number of companies (i.e., from the table) that are located within Redwood City;                   --Returns the sum of the number of employees and the company name for companies that have more than 1000 employees;           
    """)
    
    results = cursor.fetchall()
    for res in results:
        print(res)

except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

(10,)


### There are 10 companies from the table that are located within Redwood City. 

# Query on all companies in the table whose number of employees ranges from 500 to 2,200. Return the company name and number of employees:

In [24]:
import psycopg2

connection = None
#Initialises the database connection

try:
    
    connection = psycopg2.connect(database="learning", user="postgres", password="five", host="localhost", port="5433")
    
    cursor = connection.cursor()
    #Connects Ptyhon to a cursor to be able to execute postgreSQL commands.
    #Note: the source of this data is a 2016 Redwood City government report on the largest companies located in Redwood city
    
    cursor.execute("""SELECT company_name, n_employees FROM companies WHERE n_employees BETWEEN 500 AND 2200;
                   --Selects data for companies whose number of employees range from 500 to 2200; """)
    
    results = cursor.fetchall()
    for res in results:
        print(res)

except psycopg2.DatabaseError:
    #I.e., if an error happens when attempting to connect to the database, or an error from the SQL code implemented.
    if connection:
        connection.rollback()
        #I.e., do NOT commit the changes on account of an error. 
 

connection.commit()
#I.e., commits and saves the changes to the database, given there are no Database errors.



connection.close()
cursor.close()

('Stanford Hospital', 750)
('Kaiser Foundation', 773)
('Uber', 1980)
('Yelp', 1717)
('Google', 1500)
