In [2]:
import numpy as np

In [3]:
# Load the employee.csv file into a Structured NumPy array.
# Display the first 5 records.
df=np.genfromtxt("employee.csv",dtype=None,delimiter=',',names=True,encoding=None)
print(df[:5])

[(101, 'John', 'Doe', 75000, '15-05-1990', 'New York', 101, 'Engineering')
 (102, 'Jane', 'Smith', 65000, '23-08-1988', 'San Francisco', 102, 'Marketing')
 (103, 'Michael', 'Johnson', 80000, '10-02-1985', 'Chicago', 101, 'Engineering')
 (104, 'Emily', 'Williams', 70000, '30-11-1992', 'Los Angeles', 103, 'Human Resources')
 (105, 'Robert', 'Miller', 90000, '18-07-1980', 'Seattle', 102, 'Marketing')]


In [4]:
# Retrieve employees with a salary greater than $70,000.
print(df[df['sal']>70000])

[(101, 'John', 'Doe', 75000, '15-05-1990', 'New York', 101, 'Engineering')
 (103, 'Michael', 'Johnson', 80000, '10-02-1985', 'Chicago', 101, 'Engineering')
 (105, 'Robert', 'Miller', 90000, '18-07-1980', 'Seattle', 102, 'Marketing')
 (106, 'Alice', 'Jackson', 72000, '05-04-1995', 'Miami', 104, 'Finance')
 (107, 'David', 'Anderson', 85000, '12-09-1987', 'Dallas', 101, 'Engineering')
 (108, 'Olivia', 'Thomas', 78000, '08-12-1991', 'Houston', 103, 'Human Resources')
 (109, 'William', 'Brown', 95000, '25-06-1982', 'Phoenix', 102, 'Marketing')
 (111, 'James', 'Clark', 89000, '22-03-1984', 'Denver', 101, 'Engineering')
 (112, 'Emma', 'Hall', 82000, '17-01-1994', 'Portland', 103, 'Human Resources')
 (113, 'Christopher', 'Young', 76000, '03-07-1989', 'San Diego', 102, 'Marketing')
 (114, 'Isabella', 'Hill', 91000, '20-04-1983', 'Orlando', 101, 'Engineering')
 (116, 'Abigail', 'Turner', 80000, '28-02-1986', 'Austin', 103, 'Human Resources')
 (117, 'Liam', 'Carter', 74000, '05-09-1997', 'Minneap

In [5]:
# Sort Employees based on salaries in descending order
print(np.sort(df,kind='mergesort',order='sal')[::-1][:3])

[(109, 'William', 'Brown', 95000, '25-06-1982', 'Phoenix', 102, 'Marketing')
 (118, 'Ella', 'Ross', 92000, '10-05-1981', 'Charlotte', 101, 'Engineering')
 (114, 'Isabella', 'Hill', 91000, '20-04-1983', 'Orlando', 101, 'Engineering')]


In [6]:
# Born after 1990

from datetime import datetime as dt
born_1990=np.array([dt.strptime(d,"%d-%m-%Y").year>1990 for d in df['dateofbirth']])
print(df[born_1990])

[(104, 'Emily', 'Williams', 70000, '30-11-1992', 'Los Angeles', 103, 'Human Resources')
 (106, 'Alice', 'Jackson', 72000, '05-04-1995', 'Miami', 104, 'Finance')
 (108, 'Olivia', 'Thomas', 78000, '08-12-1991', 'Houston', 103, 'Human Resources')
 (110, 'Sophia', 'White', 68000, '14-08-1993', 'Atlanta', 104, 'Finance')
 (112, 'Emma', 'Hall', 82000, '17-01-1994', 'Portland', 103, 'Human Resources')
 (115, 'Aiden', 'Ward', 69000, '15-11-1996', 'Tampa', 104, 'Finance')
 (117, 'Liam', 'Carter', 74000, '05-09-1997', 'Minneapolis', 102, 'Marketing')
 (119, 'Jackson', 'Martin', 67000, '18-03-1998', 'Raleigh', 104, 'Finance')]


In [7]:
# Calculate the age of each employee based on their date of birth

cy=dt.now().year # Current Year
ages=np.array([(cy - dt.strptime(d,"%d-%m-%Y").year) for d in df['dateofbirth']])
print(ages)

[35 37 40 33 45 30 38 34 43 32 41 31 36 42 29 39 28 44 27 37]


In [8]:
# Calculate the age of each employee based on their date of birth and also their name

cy=dt.now().year # Current Year
ages=np.array([(d['empfirstname'],cy - dt.strptime(d['dateofbirth'],"%d-%m-%Y").year) for d in df[['empfirstname','dateofbirth']]])
print(ages)

[['John' '35']
 ['Jane' '37']
 ['Michael' '40']
 ['Emily' '33']
 ['Robert' '45']
 ['Alice' '30']
 ['David' '38']
 ['Olivia' '34']
 ['William' '43']
 ['Sophia' '32']
 ['James' '41']
 ['Emma' '31']
 ['Christopher' '36']
 ['Isabella' '42']
 ['Aiden' '29']
 ['Abigail' '39']
 ['Liam' '28']
 ['Ella' '44']
 ['Jackson' '27']
 ['Sofia' '37']]


In [9]:
# Total salary of each department
departments=np.unique(df['deptname'])
l=[]
for dept in departments:
    l.append([dept,np.sum(df[df['deptname']==dept]['sal'])])
arr=np.array(l)
print(arr)
    

[['Engineering' '512000']
 ['Finance' '276000']
 ['Human Resources' '398000']
 ['Marketing' '400000']]


In [10]:
# Count the number of employees in each city
cities = np.unique(df['city'])
l=[]
for city in cities:
    l.append([city,np.count_nonzero(df["city"]==city)])
arr=np.array(l)
print(arr)

[['Atlanta' '1']
 ['Austin' '1']
 ['Charlotte' '1']
 ['Chicago' '1']
 ['Dallas' '1']
 ['Denver' '1']
 ['Houston' '1']
 ['Los Angeles' '1']
 ['Miami' '1']
 ['Minneapolis' '1']
 ['New York' '1']
 ['Orlando' '1']
 ['Phoenix' '1']
 ['Portland' '1']
 ['Raleigh' '1']
 ['Sacramento' '1']
 ['San Diego' '1']
 ['San Francisco' '1']
 ['Seattle' '1']
 ['Tampa' '1']]


In [11]:
# Increase the salary of employees in the Marketing department by 10%.
mark=df[df['deptname']=='Marketing']
mark['sal']=mark['sal'] * 1.1
#print(mark)
df[df['deptname']=='Marketing']=mark