Structured array allow us to store complex data like tables in the simple NumPy array where eacch elements can contains multiple namefield like columns.

In [1]:
import numpy as np

In [None]:
data = np.array([(1,"sasha",89.0), (2, "Sansa", 74.0), (3, "Sonia", 55.0)], dtype = [('id', 'i4'), ('name', 'U10'), ('score', 'f4')])

rec = data.view(np.recarray)
print(data,"\n") #accessing whole data
print(rec.id,"\n") #accessing id only
print(rec.name,"\n") #accessing name only
print(rec.score,"\n") #accessing score only

[(1, 'sasha', 89.) (2, 'Sansa', 74.) (3, 'Sonia', 55.)] 

[1 2 3] 

['sasha' 'Sansa' 'Sonia'] 

[89. 74. 55.] 



Student Record System

In [None]:
import numpy as np

def find_topper(student_data):
    return student_data[np.argmax(student_data['score'])]

#structured array
students = np.array([
    (101, 'Sita', 88.5),
    (102, 'Gita', 91.0),
    (103, 'Hari', 76.3)
], dtype=[('id', 'i4'), ('name', 'U15'), ('score', 'f4')])

print("All student records:\n", students, "\n")

# Access individual fields
print("Names:", students['name'], "\n")
print("Scores:", students['score'], "\n")

# Students scoring above 80
high_scorers = students[students['score'] > 80]
print("High scorers:", high_scorers['name'], "\n")

# Sort students from highest to lowest score
sorted_students = np.sort(students, order='score')[::-1]
print("Sorted students (high to low):\n", sorted_students, "\n")

# View as recarray
rec = students.view(np.recarray)
print("Access via recarray - Names:", rec.name)
print("Access via recarray - Scores:", rec.score, "\n")

# Access list of IDs
print("Student IDs:", students['id'], "\n")

# Count students scoring < 85
lessthan85 = students[students['score'] < 85]
print("Students scoring less than 85:", lessthan85['name'], "\n")

# Find topper
top_scorer = find_topper(students)
print("Topper is:", top_scorer['name'], "\n")

# Average score
avg_score = np.mean(students['score'])
print("Average score:", avg_score, "\n")

# Formatted result display
for s in students:
    print(f"{s['name']} scored {s['score']}")


All student records:
 [(101, 'Sita', 88.5) (102, 'Gita', 91. ) (103, 'Hari', 76.3)] 

Names: ['Sita' 'Gita' 'Hari'] 

Scores: [88.5 91.  76.3] 

High scorers: ['Sita' 'Gita'] 

Sorted students (high to low):
 [(102, 'Gita', 91. ) (101, 'Sita', 88.5) (103, 'Hari', 76.3)] 

Access via recarray - Names: ['Sita' 'Gita' 'Hari']
Access via recarray - Scores: [88.5 91.  76.3] 

After updating Hari's score:
 [(101, 'Sita', 88.5) (102, 'Gita', 91. ) (103, 'Hari', 76.3)] 

Student IDs: [101 102 103] 

Students scoring less than 85: ['Hari'] 

Topper is: Gita 

Average score: 85.26667 

Sita scored 88.5
Gita scored 91.0
Hari scored 76.30000305175781


In [None]:

data = np.genfromtxt('data.csv', delimiter=',', names=True, dtype=None, encoding='utf-8')

In [None]:
#print 5 rows
print(data[:5],"\n")

# field names 
print(data.dtype.names,"\n")

# individual field access
print(data['Stage_fear'],"\n")

# access specific field
print("Hours top 10 people spent going outside",data['Going_outside'][:10],"\n")

# accessing top 5 extroverts
personality = data[data['Personality']=='Extrovert']
print("Extroverts\n",personality[:5],"\n")

# find unique values in one column
unique_values = np.unique(data['Personality'])
print(f"Unique Personalities:{unique_values}\n")

#counting number of rows
count_rows = len(data)
print(f"Total rows in this dataset are {count_rows}\n")

# print one record completely
row = (data[3])

for field in data.dtype.names:
    print(f"{field} : {row[field]}")



[(4., 'No', 4., 6., 'No', 13., 5., 'Extrovert')
 (9., 'Yes', 0., 0., 'Yes',  0., 3., 'Introvert')
 (9., 'Yes', 1., 2., 'Yes',  5., 2., 'Introvert')
 (0., 'No', 6., 7., 'No', 14., 8., 'Extrovert')
 (3., 'No', 9., 4., 'No',  8., 5., 'Extrovert')] 

('Time_spent_Alone', 'Stage_fear', 'Social_event_attendance', 'Going_outside', 'Drained_after_socializing', 'Friends_circle_size', 'Post_frequency', 'Personality') 

['No' 'Yes' 'Yes' ... 'Yes' 'Yes' 'No'] 

Hours top 10 people spent going outside [ 6.  0.  2.  7.  4.  5. nan  4.  3.  6.] 

Extroverts
 [(4., 'No', 4.,  6., 'No', 13., 5., 'Extrovert')
 (0., 'No', 6.,  7., 'No', 14., 8., 'Extrovert')
 (3., 'No', 9.,  4., 'No',  8., 5., 'Extrovert')
 (1., 'No', 7.,  5., 'No',  6., 6., 'Extrovert')
 (4., 'No', 9., nan, 'No',  7., 7., 'Extrovert')] 

Unique Personalities:['Extrovert' 'Introvert']

Total rows in this dataset are 2900

Time_spent_Alone : 0.0
Stage_fear : No
Social_event_attendance : 6.0
Going_outside : 7.0
Drained_after_socializing :

In [110]:
# structured array

employee = np.array([
    (25, 'Data Cleaning', 150000 ),
    (50, 'Data Processing', 200000),
    (75, 'AI developer', 130000),
    (100, 'Web Developer', 70000),
    (125, 'Human Resource', 140000),
    (150, 'Management', 170000),
    (175, 'Coordinator', 230000),
    (200, 'Head Of Department', 500000),
    (225, 'Helper', 50000),
    (250, 'Model Training', 320000),
    
], dtype = [
    ('id', 'i4'),
    ('department', 'U15'),
    ('salary', 'f4')
            ]
)

print(employee.dtype.names)

# salary column
print(employee['salary'])

# filtering employess with salary more than 300000
SalaryMoreThan300K = employee[employee['salary'] > 300000]
print(f"Salaries greater than 50 thousand are\n {SalaryMoreThan300K}")

# updating department of all employees where id < 100
employee['department'][employee['id'] < 100] = 'Junior Staff'
print("\nUpdated departments for id < 100:\n", employee[employee['id'] < 100])

rec = employee.view(np.recarray)
print(rec.id)
print(rec.department)
print(rec.salary)

('id', 'department', 'salary')
[150000. 200000. 130000.  70000. 140000. 170000. 230000. 500000.  50000.
 320000.]
Salaries greater than 50 thousand are
 [(200, 'Head Of Departm', 500000.) (250, 'Model Training', 320000.)]

Updated departments for id < 100:
 [(25, 'Junior Staff', 150000.) (50, 'Junior Staff', 200000.)
 (75, 'Junior Staff', 130000.)]
[ 25  50  75 100 125 150 175 200 225 250]
['Junior Staff' 'Junior Staff' 'Junior Staff' 'Web Developer'
 'Human Resource' 'Management' 'Coordinator' 'Head Of Departm' 'Helper'
 'Model Training']
[150000. 200000. 130000.  70000. 140000. 170000. 230000. 500000.  50000.
 320000.]
