<h1>Instructions</h1>
Normalize the following block of data by copying the data below, except the triple quotes, <br>
and placing the data in a file called employee_data.csv<br>
1 - Import the file using core python<br>
2 - Normalize the data and separate into component tables<br>
3 - Use SQL to create the tables and populate them<br>
4 - Answer the following questions using SQL<br>
- Provide a count of the employees by gender<br>
- Provide a count of the employees by dept name<br>
- Provide a list of employees in last name, first name order<br>
- Provide a list of employees in tenure order (active employees only) in descending order<br>
- Provide a list of employees by manager name in alphabetical order by last name<br>
- Provide a list of employees by region<br>
- Provide a list of employees who have exceeded their allotment of sick days<br>

In [320]:
import sqlite3
employee_data = []
with open('employee_data.csv', 'r') as file:
    for line in file:
        employee_data.append(line.replace("'",'').split(',')[1:])

connection = sqlite3.connect('employee_data.db')
cursor = connection.cursor()
cursor.execute('''create table if not exists employees (
    marital_status text, 
    last_name text, 
    first_name text, 
    hire_date text, 
    gender text, 
    dept_id text, 
    dept_name text, 
    manager_id int, 
    status text, 
    term_date text, 
    region text, 
    sick_days_accumulated text, 
    sick_day1 text, 
    sick_day2 text, 
    sick_day3 text
)''')

for employee in employee_data[1:]:
    cursor.execute('''insert into employees values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', (employee))

connection.commit()
connection.close()

In [321]:
import sqlite3

male_employees = []
female_employees = []

connection = sqlite3.connect('employee_data.db')
cursor = connection.cursor()

# employees by name
male = list(cursor.execute("""select * from employees where gender = 'Male'"""))
female = list(cursor.execute("""select * from employees where gender = 'Female'"""))

# get department names
department_names = set(cursor.execute("""select dept_name from employees"""))
department_names = [name[0] for name in department_names]

# Count of employeess by department name
by_department={}
for department in department_names:
    by_department[department] = list(cursor.execute("select last_name,first_name from employees where dept_name = ?",(department,)))    
[print(department,by_department[department],'\n') for department in by_department]



Accounting [('Rivera', 'Jewels'), ('Freach', 'Greta'), ('Gantt', 'Jackson'), ('Davidson', 'Wilma'), ('Xi', 'Yu'), ('Martinez', 'Margarita'), ('Regis', 'Lucy')] 

Information Technology [('Jones', 'Pamala'), ('Ramos', 'Diego'), ('Sanchez', 'Jorge'), ('Sung', 'Henry'), ('Pollard', 'Franz'), ('Keller', 'Bradley'), ('Lagos', 'Leeland'), ('Holland', 'Henry'), ('Torrez', 'Lea')] 

Warehouse [('Sing', 'Hu'), ('Breen', 'Margret'), ('Opus', 'Oliver'), ('Lee', 'Tatum'), ('Jackson', 'Jessica'), ('Forrest', 'Peter'), ('Jones', 'Jessica')] 

Executive [('Saulzman', 'Randle')] 

Finance [('Davidson', 'Fred'), ('Williams', 'William'), ('Davis', 'Elizabeth'), ('Jarvis', 'Melvin'), ('Quinones', 'Maribel'), ('Nulland', 'Nicholas'), ('Saunders', 'Asia')] 

Human Resources [('Ellis', 'Francis'), ('Yee', 'Sue'), ('Tate', 'Rachael'), ('Williamson', 'Ursula'), ('French', 'Sara'), ('Ute', 'Ulsula')] 



[None, None, None, None, None, None]

In [322]:
# Employees alphabetically by last name
sorted_last_name = list(cursor.execute("select last_name,first_name from employees order by last_name asc"))    
for i in sorted_last_name:
    print(' '.join(i))

Breen Margret
Davidson Fred
Davidson Wilma
Davis Elizabeth
Ellis Francis
Forrest Peter
Freach Greta
French Sara
Gantt Jackson
Holland Henry
Jackson Jessica
Jarvis Melvin
Jones Pamala
Jones Jessica
Keller Bradley
Lagos Leeland
Lee Tatum
Martinez Margarita
Nulland Nicholas
Opus Oliver
Pollard Franz
Quinones Maribel
Ramos Diego
Regis Lucy
Rivera Jewels
Sanchez Jorge
Saulzman Randle
Saunders Asia
Sing Hu
Sung Henry
Tate Rachael
Torrez Lea
Ute Ulsula
Williams William
Williamson Ursula
Xi Yu
Yee Sue


In [323]:
# list of active employees by tenure order desc
sorted_tenure = list(cursor.execute("select last_name,first_name from employees where status = 'NON-EXEMPT' order by hire_date desc"))    
for i in sorted_tenure:
    print(' '.join(i))

Sing Hu
Yee Sue
Jackson Jessica
Martinez Margarita
Lee Tatum
French Sara
Opus Oliver
Rivera Jewels
Jones Jessica
Forrest Peter
Ute Ulsula
Williamson Ursula


In [324]:
# get manager id's
manager_ids = set(cursor.execute("""select manager_id from employees"""))
manager_ids = [name[0] for name in manager_ids]

# list of employees by manager name, in order alphabetically by managers last name (there is no manager name in provided data......?)
# sorted_manager = list(cursor.execute("select last_name,first_name from employees order by manager_id desc"))
managers={}
for manager_id in manager_ids:
    managers[manager_id] = (list(cursor.execute("select last_name,first_name from employees where manager_id = ? order by last_name asc",(manager_id,))) )
managers = dict(sorted(managers.items()))
for i in managers:
    print(i,managers[i],'\n')

1 [('French', 'Sara'), ('Tate', 'Rachael'), ('Ute', 'Ulsula'), ('Williamson', 'Ursula'), ('Yee', 'Sue')] 

2 [('Holland', 'Henry'), ('Keller', 'Bradley'), ('Lagos', 'Leeland'), ('Pollard', 'Franz'), ('Ramos', 'Diego'), ('Sanchez', 'Jorge'), ('Sung', 'Henry'), ('Torrez', 'Lea')] 

3 [('Davis', 'Elizabeth'), ('Jarvis', 'Melvin'), ('Nulland', 'Nicholas'), ('Quinones', 'Maribel'), ('Saunders', 'Asia'), ('Williams', 'William')] 

19 [('Freach', 'Greta'), ('Gantt', 'Jackson'), ('Martinez', 'Margarita'), ('Regis', 'Lucy'), ('Rivera', 'Jewels')] 

30 [('Breen', 'Margret'), ('Jackson', 'Jessica'), ('Jones', 'Jessica'), ('Lee', 'Tatum'), ('Opus', 'Oliver'), ('Sing', 'Hu')] 

37 [('Davidson', 'Fred'), ('Davidson', 'Wilma'), ('Ellis', 'Francis'), ('Forrest', 'Peter'), ('Jones', 'Pamala'), ('Saulzman', 'Randle'), ('Xi', 'Yu')] 



In [325]:
# get regions
regions = set(cursor.execute("""select region from employees"""))
regions = [region[0] for region in regions]

# lists of employees by region, alphatbetically
by_region = {}
for region in regions:
    by_region[region] = (list(cursor.execute("select last_name,first_name from employees where region = ? order by last_name asc",(region,))) )
by_region = dict(sorted(by_region.items()))
for i in by_region:
    print(i,by_region[i],'\n')

EAST [('Breen', 'Margret'), ('Keller', 'Bradley'), ('Nulland', 'Nicholas'), ('Opus', 'Oliver'), ('Pollard', 'Franz'), ('Saulzman', 'Randle')] 

NORTH EAST [('Davidson', 'Fred'), ('Ellis', 'Francis'), ('Jones', 'Pamala'), ('Ramos', 'Diego'), ('Rivera', 'Jewels'), ('Sanchez', 'Jorge'), ('Sing', 'Hu'), ('Yee', 'Sue')] 

NORTH WEST [('Jackson', 'Jessica'), ('Lee', 'Tatum'), ('Saunders', 'Asia'), ('Ute', 'Ulsula')] 

SOUTH EAST [('Forrest', 'Peter'), ('Holland', 'Henry'), ('Jones', 'Jessica'), ('Lagos', 'Leeland'), ('Martinez', 'Margarita'), ('Regis', 'Lucy'), ('Torrez', 'Lea')] 

SOUTH WEST [('Davidson', 'Wilma'), ('Davis', 'Elizabeth'), ('Freach', 'Greta'), ('Gantt', 'Jackson'), ('Jarvis', 'Melvin'), ('Sung', 'Henry'), ('Tate', 'Rachael'), ('Williams', 'William')] 

WEST [('French', 'Sara'), ('Quinones', 'Maribel'), ('Williamson', 'Ursula'), ('Xi', 'Yu')] 



In [326]:
# lists of employees exceeding their allotment of sick days
# get names
names = set(cursor.execute("""select last_name,first_name from employees order by last_name asc"""))
names = [name for name in names]

# lists of employees sick day data
sick_days = {}
for name in names:
    sick_days[' '.join(name)] = (list(cursor.execute("select sick_days_accumulated,sick_day1,sick_day2,sick_day3 from employees where last_name = ?",(name[0],))) )
by_region = dict(sorted(by_region.items()))

employees_exceeding = [] 

for employee in sick_days:
    # create key/value of employee/sick_day_data
    sick_days[employee] = list(sick_days[employee][0])
    for i in range(len(sick_days[employee])):
        try:
            # remove unwanted data
            sick_days[employee].remove('')
        except:
            pass
        # lol
        try:
            sick_days[employee].remove('\n')
        except:
            pass

    # if they used more sick days than they have
    if int(sick_days[employee][0]) < len(sick_days[employee][1:]):
        (sick_days[employee])
        employees_exceeding.append(employee)
    
for i in employees_exceeding:
    print(i)
connection.close()

Martinez Margarita
