<div style="text-align:left;font-size:2em"><span style="font-weight:bolder;font-size:1.25em">SP2273 | Learning Portfolio</span><br><br><span style="font-weight:bold;color:darkred">Storing Data (Need)</span></div>

# What to expect in this chapter

# 1 Lists, Arrays & Dictionaries

## 1.1 Let’s compare

In [7]:
import numpy as np

In [3]:
# Python Lists
py_super_names = ["Black Widow", "Iron Man", "Doctor Strange"]
py_real_names = ["Natasha Romanoff", "Tony Stark", "Stephen Strange"]

In [5]:
py_super_names

['Black Widow', 'Iron Man', 'Doctor Strange']

In [None]:
# Numpy Arrays
np_super_names = np.array(["Black Widow", "Iron Man", "Doctor Strange"])
np_real_names = np.array(["Natasha Romanoff", "Tony Stark", "Stephen Strange"])

In [None]:
np_super_names = np.array(py_super_names)   # convering python list to numpy array

In [13]:
# Dictionary
superhero_info = {
    "Natasha Romanoff": "Black Window",
    "Tony Stark": "Iron Man",
    "Stephen Strange": "Doctor Strange"
}

## 1.2 Accessing data from a list (or array)

In [14]:
py_super_names = ["Black Widow", "Iron Man", "Doctor Strange"]
py_real_names = ["Natasha Romanoff", "Tony Stark", "Stephen Strange"]

In [15]:
py_real_names[0]

'Natasha Romanoff'

In [16]:
py_super_names[0]

'Black Widow'

In [17]:
py_super_names[2]

'Doctor Strange'

In [18]:
py_super_names[-1]

'Doctor Strange'

## 1.3 Accessing data from a dictionary

In [4]:
superhero_info = {
    "Natasha Romanoff": "Black Widow",
    "Tony Stark": "Iron Man",
    "Stephen Strange": "Doctor Strange"
}

In [5]:
superhero_info["Natasha Romanoff"]

'Black Widow'

In [None]:
superhero_info.keys() # Access all the keys in the dictionary

dict_keys(['Natasha Romanoff', 'Tony Stark', 'Stephen Strange'])

In [9]:
superhero_info.values() # Access all the values in the dictionary

dict_values(['Black Widow', 'Iron Man', 'Doctor Strange'])

## 1.4 Higher dimensional lists

In [10]:
py_superhero_info = [['Natasha Romanoff', 'Black Widow'], ['Tony Stark', 'Iron Man'], ['Stephen Strange', 'Doctor Strange']]

In [13]:
py_superhero_info[1][1] # Second name and hero name in the list

'Iron Man'

# 2 Lists vs. Arrays

## 2.1 Size

In [19]:
py_list_2d = [[1, "A"], [2, "B"], [3, "C"], [4, "D"], [5, "E"], [6, "F"], [7, "G"], [8, "H"], [9, "I"], [10, "J"]]

In [20]:
import numpy as np
np_array_2d = np.array(py_list_2d)

In [22]:
len(py_list_2d)

10

In [None]:
len(np_array_2d) # returns the size of the first dimension (rows)

10

In [None]:
np_array_2d.shape # This returns a tuple: 10 is the number of rows and 2 is the number of columns

(10, 2)

In [21]:
len(py_list_2d)
len(np_array_2d)
np_array_2d.shape

(10, 2)

## 2.2 Arrays are fussy about type

In [25]:
py_list = [1, 1.5, 'A']
np_array = np.array(py_list)

In [None]:
py_list # A list can hold a mix of integers, floats, and strings at the same time

[1, 1.5, 'A']

In [32]:
np_array # NumPy arrays tolerate only a single type - a string

array(['1', '1.5', 'A'], dtype='<U32')

In [None]:
# Create a slice of just numbers from the original NumPy array
numbers_only = np_array[0:2]

# Convert them back to floats that you can perform math with
math_ready = numbers_only.astype(float)

## 2.3 Adding a number

Performing an operation like + 10 on an array is called vectorization or broadcasting

In [2]:
import numpy as np
py_list = [1, 2, 3, 4, 5]
np_array = np.array(py_list)
np_array + 10

array([11, 12, 13, 14, 15])

In [4]:
py_list + 10 # Will not work as + operator is used joining two lists together

TypeError: can only concatenate list (not "int") to list

## 2.4 Adding another list

In [5]:
py_list_1 = [1, 2, 3, 4, 5]
py_list_2 = [10, 20, 30, 40, 50]

np_array_1 = np.array(py_list_1)
np_array_2 = np.array(py_list_2)

In [7]:
py_list_1 + py_list_2

[1, 2, 3, 4, 5, 10, 20, 30, 40, 50]

In [8]:
np_array_1 + np_array_2

array([11, 22, 33, 44, 55])

## 2.5 Multiplying by a Number

In [9]:
py_list = [1, 2, 3, 4, 5]
np_array = np.array(py_list)

In [None]:
py_list*2 # Multiplying a LIST by n performs a repeat of the list by n 

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [12]:
np_array*2 # NumPy uses broadcasting to perform element-wise multiplication

array([ 2,  4,  6,  8, 10])

## 2.6 Squaring

In [13]:
py_list = [1, 2, 3, 4, 5]
np_array = np.array(py_list)

In [14]:
np_array**2

array([ 1,  4,  9, 16, 25])

In [15]:
py_list**2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

## 2.7 Asking questions

In [16]:
py_list = [1, 2, 3, 4, 5]
np_array = np.array(py_list)         

In [None]:
py_list == 3 # This checks if the entire list object is = to 3

False

In [21]:
py_list == [1, 2, 3, 4, 5]

True

In [22]:
np_array == 3
# This performs an element-wise comparison: looks at every number and ask "Is it 3?"

array([False, False,  True, False, False])

In [None]:
np_array > 3 # Checks if every element is greater than 3

array([False, False, False,  True,  True])

## 2.8 Mathematics

In [23]:
py_list = [1, 2, 3, 4, 5]
np_array = np.array(py_list)

In [24]:
sum(py_list)

15

In [27]:
max(py_list)

5

In [26]:
min(py_list)

1

In [28]:
np_array.sum()

np.int64(15)

In [29]:
np_array.max()

np.int64(5)

In [30]:
np_array.min()

np.int64(1)

In [31]:
np_array.mean()

np.float64(3.0)

In [32]:
np_array.std()

np.float64(1.4142135623730951)

In [34]:
py_list.sum() # Standard Python lists do not have ".sum()" method, only have global "sum()"

AttributeError: 'list' object has no attribute 'sum'

## Footnotes

## More Practice

In [6]:
# Indexing
import numpy as np
np_array = np.array([10, 20, 30, 40, 50])

print(np_array[0])   # Output: 10 (Accesses the first element)
print(np_array[-1])  # Output: 50 (Accesses the last element)

10
50


In [7]:
# Indexing
list = [10, 20, 30, 40, 50]

print(list[0])
print(list[-1])

10
50


In [10]:
# Slicing
import numpy as np
np_array = np.array([10, 20, 30, 40, 50])

subset = np_array[0:2]
print(subset)
print(np_array[::2]) # Prints every second element

[10 20]
[10 30 50]


In [15]:
# Slicing
data = np.array(range(1, 11))
result = data[::2]
print(result)

[1 3 5 7 9]


In [18]:
# Masking Conditional Subsetting
import numpy as np

temp_c = np.array([21, 35, 18, 40, 22, 10, 38])

# The Question
hot_mask = temp_c > 30

# Apply the Mask to Subset
hot_temps = temp_c[hot_mask]

print(hot_temps)


[35 40 38]


In [20]:
# Masking Conditional Subsetting (Multiple Conditions)
# Example Question: Warm Temperatures between 20 and 30 degrees celsisus

import numpy as np

temp_c = np.array([21, 35, 18, 40, 22, 10, 38])

warm_mask = (temp_c >= 20) & (temp_c <= 30)

warm_temps = temp_c[warm_mask]

print(warm_temps)


[21 22]


In [21]:
# Masking Conditional Subsetting (Multiple Conditions)
# Example Question: Warm Temperatures between 20 and 30 degrees celsisus

import numpy as np

temp_c = np.array([21, 35, 18, 40, 22, 10, 38])

for i in temp_c:
    if i >= 20 and i <= 30:
        print(i)

21
22


In [33]:
# Subsetting Multi-dimensional Arrays

grades = np.array([
    [85, 90, 78, 92],
    [70, 88, 95, 81],
    [60, 72, 68, 75]
])

# Extract the scores of the second student (3 students - rows, 4 subjects - columns)
print(grades[1, :])

# Extract the scores for the last subject for all students
last_sub = grades[:, 3]
print(last_sub)

# Extract a sub-grid containing the first 2 subjects for the first 2 students
print(grades[0:2, 0:2]) # or grades[:2, :2] # both selects (row, column)

[70 88 95 81]
[92 81 75]
[[85 90]
 [70 88]]
