<h1>Instructions</h1>
Normalize the following block of data by copying the data below, except the triple quotes, <br>
and placing the data in a file called employee_data.csv<br>
1 - Import the file using core python<br>
2 - Normalize the data and separate into component tables<br>
3 - Use SQL to create the tables and populate them<br>
4 - Answer the following questions using SQL<br>
- Provide a count of the employees by gender<br>
- Provide a count of the employees by dept name<br>
- Provide a list of employees in last name, first name order<br>
- Provide a list of employees in tenure order (active employees only) in descending order<br>
- Provide a list of employees by manager name in alphabetical order by last name<br>
- Provide a list of employees by region<br>
- Provide a list of employees who have exceeded their allotment of sick days<br>

In [144]:
import sqlite3
employee_data = []
with open('employee_data.csv', 'r') as file:
    for line in file:
        employee_data.append(line.replace("'",'').split(','))

# Open/Create database
connection = sqlite3.connect('employee_data.db')
cursor = connection.cursor()

# Make employee table
cursor.execute('''create table if not exists employees (
    employee_id int,
    marital_status text, 
    last_name text, 
    first_name text, 
    hire_date text, 
    gender text, 
    dept_id text, 
    dept_name text, 
    manager_id int, 
    status text, 
    term_date text, 
    region text, 
    sick_days_accumulated text, 
    sick_day1 text, 
    sick_day2 text, 
    sick_day3 text
)''')

# Marriage code table
cursor.execute('''create table if not exists marital_status_codes (
    id,
    description text
)''')

# Department table
cursor.execute('''create table if not exists departments (
    id,
    description text
)''')

# Sick day table
cursor.execute('''create table if not exists sick_days (
    emp_id text,
    day_out text
)''')


# fill employee table columns
for employee in employee_data[1:]:
    cursor.execute('''insert into employees values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', (employee))

# fill marital status table columns
for employee in employee_data[1:]:
    cursor.execute('''insert into marital_status_codes values (?,?)''', (employee[1][0],employee[1]))

# fill department table columns
department_data = []
for employee in employee_data[1:]:
    department_data.append((employee[6],employee[7]))
cursor.executemany('''insert into departments values (?,?)''', set(department_data))

# fill sick_day table columns
for employee in employee_data[1:]:
    cursor.execute('''insert into sick_days values (?,?)''', (employee[0],employee[12]))

connection.commit()
connection.close()

In [145]:
import sqlite3

male_employees = []
female_employees = []

connection = sqlite3.connect('employee_data.db')
cursor = connection.cursor()

# employees by name
male = list(cursor.execute("""select * from employees where gender = 'Male'"""))
female = list(cursor.execute("""select * from employees where gender = 'Female'"""))

# get department names
department_names = set(cursor.execute("""select dept_name from employees"""))
department_names = [name[0] for name in department_names]

# Count of employeess by department name
by_department={}
for department in department_names:
    by_department[department] = list(cursor.execute("select last_name,first_name from employees where dept_name = ?",(department,)))    
[print(department,by_department[department],'\n') for department in by_department]

Warehouse [('Sing', 'Hu'), ('Breen', 'Margret'), ('Opus', 'Oliver'), ('Lee', 'Tatum'), ('Jackson', 'Jessica'), ('Forrest', 'Peter'), ('Jones', 'Jessica'), ('Sing', 'Hu'), ('Breen', 'Margret'), ('Opus', 'Oliver'), ('Lee', 'Tatum'), ('Jackson', 'Jessica'), ('Forrest', 'Peter'), ('Jones', 'Jessica')] 

Accounting [('Rivera', 'Jewels'), ('Freach', 'Greta'), ('Gantt', 'Jackson'), ('Davidson', 'Wilma'), ('Xi', 'Yu'), ('Martinez', 'Margarita'), ('Regis', 'Lucy'), ('Rivera', 'Jewels'), ('Freach', 'Greta'), ('Gantt', 'Jackson'), ('Davidson', 'Wilma'), ('Xi', 'Yu'), ('Martinez', 'Margarita'), ('Regis', 'Lucy')] 

Human Resources [('Ellis', 'Francis'), ('Yee', 'Sue'), ('Tate', 'Rachael'), ('Williamson', 'Ursula'), ('French', 'Sara'), ('Ute', 'Ulsula'), ('Ellis', 'Francis'), ('Yee', 'Sue'), ('Tate', 'Rachael'), ('Williamson', 'Ursula'), ('French', 'Sara'), ('Ute', 'Ulsula')] 

Information Technology [('Jones', 'Pamala'), ('Ramos', 'Diego'), ('Sanchez', 'Jorge'), ('Sung', 'Henry'), ('Pollard', 'Fra

[None, None, None, None, None, None]

In [146]:
# Employees alphabetically by last name
sorted_last_name = list(cursor.execute("select last_name,first_name from employees order by last_name asc"))    
for i in sorted_last_name:
    print(' '.join(i))

Breen Margret
Breen Margret
Davidson Fred
Davidson Wilma
Davidson Fred
Davidson Wilma
Davis Elizabeth
Davis Elizabeth
Ellis Francis
Ellis Francis
Forrest Peter
Forrest Peter
Freach Greta
Freach Greta
French Sara
French Sara
Gantt Jackson
Gantt Jackson
Holland Henry
Holland Henry
Jackson Jessica
Jackson Jessica
Jarvis Melvin
Jarvis Melvin
Jones Pamala
Jones Jessica
Jones Pamala
Jones Jessica
Keller Bradley
Keller Bradley
Lagos Leeland
Lagos Leeland
Lee Tatum
Lee Tatum
Martinez Margarita
Martinez Margarita
Nulland Nicholas
Nulland Nicholas
Opus Oliver
Opus Oliver
Pollard Franz
Pollard Franz
Quinones Maribel
Quinones Maribel
Ramos Diego
Ramos Diego
Regis Lucy
Regis Lucy
Rivera Jewels
Rivera Jewels
Sanchez Jorge
Sanchez Jorge
Saulzman Randle
Saulzman Randle
Saunders Asia
Saunders Asia
Sing Hu
Sing Hu
Sung Henry
Sung Henry
Tate Rachael
Tate Rachael
Torrez Lea
Torrez Lea
Ute Ulsula
Ute Ulsula
Williams William
Williams William
Williamson Ursula
Williamson Ursula
Xi Yu
Xi Yu
Yee Sue
Yee Sue


In [147]:
# list of active employees by tenure order desc
sorted_tenure = list(cursor.execute("select last_name,first_name from employees where status = 'NON-EXEMPT' order by hire_date desc"))    
for i in sorted_tenure:
    print(' '.join(i))

Sing Hu
Sing Hu
Yee Sue
Yee Sue
Jackson Jessica
Jackson Jessica
Martinez Margarita
Martinez Margarita
Lee Tatum
Lee Tatum
French Sara
French Sara
Opus Oliver
Opus Oliver
Rivera Jewels
Rivera Jewels
Jones Jessica
Jones Jessica
Forrest Peter
Forrest Peter
Ute Ulsula
Ute Ulsula
Williamson Ursula
Williamson Ursula


In [148]:
# get manager id's
manager_ids = set(cursor.execute("""select manager_id from employees"""))
manager_ids = [name[0] for name in manager_ids]
# create a managers table
cursor.execute('create table if not exists managers (employee_id int, last_name text, first_name text)')
# Fill the managers table with manager id, and names
for manager_id in manager_ids:
    manager = list(cursor.execute('select distinct last_name,first_name from employees where employee_id = ?',(manager_id,)))
    cursor.execute('insert into managers values (?,?,?)',(manager_id,manager[0][0],manager[0][1],))
connection.commit()


# get managers by their id
employees_managers = list(cursor.execute("""select distinct 
managers.last_name, managers.first_name, employees.last_name, employees.first_name
from employees 
left join managers on employees.manager_id = managers.employee_id
order by employees.last_name"""))

[print(f'Employee : {i[2]} {i[3]} \nManager: {i[0]} {i[1]}\n') for i in employees_managers]

# list of employees by manager name, in order alphabetically by managers last name (there is no manager name in provided data......?)
# sorted_manager = list(cursor.execute("select last_name,first_name from employees order by manager_id desc"))
managers={}
for manager_id in manager_ids:
    managers[manager_id] = (list(cursor.execute("select last_name,first_name from employees where manager_id = ? order by last_name asc",(manager_id,))) )

# Sort the dictionary and display the data
managers = dict(sorted(managers.items()))
# for i in managers:
#     print(i,managers[i],'\n')

Employee : Breen Margret 
Manager: Forrest Peter

Employee : Davidson Fred 
Manager: Saulzman Randle

Employee : Davidson Wilma 
Manager: Saulzman Randle

Employee : Davis Elizabeth 
Manager: Davidson Fred

Employee : Ellis Francis 
Manager: Saulzman Randle

Employee : Forrest Peter 
Manager: Saulzman Randle

Employee : Freach Greta 
Manager: Xi Yu

Employee : French Sara 
Manager: Ellis Francis

Employee : Gantt Jackson 
Manager: Xi Yu

Employee : Holland Henry 
Manager: Jones Pamala

Employee : Jackson Jessica 
Manager: Forrest Peter

Employee : Jarvis Melvin 
Manager: Davidson Fred

Employee : Jones Pamala 
Manager: Saulzman Randle

Employee : Jones Jessica 
Manager: Forrest Peter

Employee : Keller Bradley 
Manager: Jones Pamala

Employee : Lagos Leeland 
Manager: Jones Pamala

Employee : Lee Tatum 
Manager: Forrest Peter

Employee : Martinez Margarita 
Manager: Xi Yu

Employee : Nulland Nicholas 
Manager: Davidson Fred

Employee : Opus Oliver 
Manager: Forrest Peter

Employee : Po

In [149]:
# get regions
regions = set(cursor.execute("""select region from employees"""))
regions = [region[0] for region in regions]

# lists of employees by region, alphatbetically
by_region = {}
for region in regions:
    by_region[region] = (set(cursor.execute("select last_name,first_name from employees where region = ? order by last_name asc",(region,))) )
by_region = dict(sorted(by_region.items()))
for i in by_region:
    print(i,by_region[i],'\n')

EAST {('Breen', 'Margret'), ('Nulland', 'Nicholas'), ('Opus', 'Oliver'), ('Pollard', 'Franz'), ('Keller', 'Bradley'), ('Saulzman', 'Randle')} 

NORTH EAST {('Jones', 'Pamala'), ('Ellis', 'Francis'), ('Yee', 'Sue'), ('Rivera', 'Jewels'), ('Davidson', 'Fred'), ('Ramos', 'Diego'), ('Sing', 'Hu'), ('Sanchez', 'Jorge')} 

NORTH WEST {('Lee', 'Tatum'), ('Ute', 'Ulsula'), ('Jackson', 'Jessica'), ('Saunders', 'Asia')} 

SOUTH EAST {('Holland', 'Henry'), ('Lagos', 'Leeland'), ('Martinez', 'Margarita'), ('Torrez', 'Lea'), ('Regis', 'Lucy'), ('Forrest', 'Peter'), ('Jones', 'Jessica')} 

SOUTH WEST {('Davidson', 'Wilma'), ('Sung', 'Henry'), ('Davis', 'Elizabeth'), ('Gantt', 'Jackson'), ('Williams', 'William'), ('Jarvis', 'Melvin'), ('Tate', 'Rachael'), ('Freach', 'Greta')} 

WEST {('Williamson', 'Ursula'), ('Quinones', 'Maribel'), ('French', 'Sara'), ('Xi', 'Yu')} 



In [150]:
# lists of employees exceeding their allotment of sick days
# get names
names = set(cursor.execute("""select last_name,first_name from employees order by last_name asc"""))
names = [name for name in names]

# lists of employees sick day data
sick_days = {}
for name in names:
    sick_days[' '.join(name)] = (list(cursor.execute("select sick_days_accumulated,sick_day1,sick_day2,sick_day3 from employees where last_name = ?",(name[0],))) )
by_region = dict(sorted(by_region.items()))

employees_exceeding = [] 

for employee in sick_days:
    # create key/value of employee/sick_day_data
    sick_days[employee] = list(sick_days[employee][0])
    for i in range(len(sick_days[employee])):
        try:
            # remove unwanted data
            sick_days[employee].remove('')
        except:
            pass
        # lol
        try:
            sick_days[employee].remove('\n')
        except:
            pass

    # if they used more sick days than they have
    if int(sick_days[employee][0]) < len(sick_days[employee][1:]):
        (sick_days[employee])
        employees_exceeding.append(employee)
    
for i in employees_exceeding:
    print(i)
connection.close()

Martinez Margarita
