# Numpy


In [1]:
import numpy as np

## Creating Arrays
- Single dimentional array and specif}y the type of an array `np.array([1, 2, 3], dtype="int")`
- Multi dimentional array `np.array([[1, 2, 3], [4, 5, 6]])`
- Get the dimension `multi_array.ndim`
- Get the shape `array.shape`
- Get the type `multi_array.dtype`
- Specify the type of an array `np.array([1, 2, 3, 4], dtype="int16")`

In [2]:
# Single dimentional array
array = np.array([1, 2, 3])
# Adding values
array = np.append(array, 2)
print(array)

# Multi dimentional array
multi_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ndmin = 3) 
print(multi_array)

# Get the dimension
print("Dimension: ", multi_array.ndim)

# Shape
print("Shape:", array.shape)

# Type
print("Type:", multi_array.dtype)

# Specify the type of an array
array = np.array([1, 2, 3, 4], dtype="int16")
print(array.dtype)

[1 2 3 2]
[[[1 2 3]
  [4 5 6]
  [7 8 9]]]
Dimension:  3
Shape: (4,)
Type: int32
int16


### Generate array
- zero array `np.zeros(8)`
- one array `np.ones(8)`
- 1 to n array `np.arange(8)`
- Any other number `np.full(<dimension>, <value>, <type>)`
- The identity matrix `np.identity(3)`

In [3]:
print(f"Zeros: {np.zeros(8)}")
print(f"Ones: {np.ones(8)}")
print(f"1 to n: {np.arange(8)}")

# np.arange
print(np.arange(10, 81, 20)) # np.arange(start, stop, step)

# You can also create multi dimensional arrays
print(f"Ones multi dimensional: {np.ones((6, 2))}")

# Any other number np.full(<dimension>, <value>, <type>)
print(f"Nines: {np.full((3, 3), 9, dtype = 'float32')}")

# The identity matrix
print(np.identity(3))

Zeros: [0. 0. 0. 0. 0. 0. 0. 0.]
Ones: [1. 1. 1. 1. 1. 1. 1. 1.]
1 to n: [0 1 2 3 4 5 6 7]
[10 30 50 70]
Ones multi dimensional: [[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]
Nines: [[9. 9. 9.]
 [9. 9. 9.]
 [9. 9. 9.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### Random arrays

In [4]:
# Random decimal numbers array
print(np.random.rand(1, 9))

# Random int numbers array. np.random.randint(<min>, <max>, <size>)
print(np.random.randint(2, 9, 9))

# Passing an array as example
np.random.random_sample(array.shape)

[[0.69767853 0.3672776  0.91311877 0.07740979 0.22570875 0.16612649
  0.18482316 0.55330808 0.21952489]]
[3 4 2 3 5 2 4 6 7]


array([0.80248509, 0.29414056, 0.60830507, 0.13203113])

### Reshape arrays
`array.reshape()`

In [5]:
array = np.arange(10)
print(f"Original array: {array}")

array = array.reshape((2, 5))
array

Original array: [0 1 2 3 4 5 6 7 8 9]


array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

## Data types
[Data types documentation](https://numpy.org/doc/stable/reference/arrays.scalars.html)

In [6]:
array = np.array([1, 2, 3, 5], dtype = "int")
bool_array = np.array([[1, 0, 0, 1], [0, 1, 1, 0]], dtype = np.bool_)
char_array = np.array(['a', 'b', 'c'], dtype = np.chararray)
nan_array = np.nan

print(f"int array: {array}")
print(f"bool array: {bool_array}")
print(f"char array: {char_array}")
print(f"nan array: {nan_array}")

np.isnan(nan_array) # Check if the array is nan

int array: [1 2 3 5]
bool array: [[ True False False  True]
 [False  True  True False]]
char array: ['a' 'b' 'c']
nan array: nan


True

### Nan values

In [7]:
nan_array = np.array([[4, 3], [np.nan, 2.], [np.nan, np.nan]])

print(nan_array[np.isnan(nan_array)])
print(nan_array[~np.isnan(nan_array)])

[nan nan nan]
[4. 3. 2.]


## Accessing / Changing specific elements, rows, column, etc
### Get a specific element

In [8]:
array = np.array([[1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14]])
array[1, 0]

8

### Get a specific row

In [9]:
print(array[0, :]) # First column 
print(array[1, :]) # Second column...

[1 2 3 4 5 6 7]
[ 8  9 10 11 12 13 14]


### Get a specific column

In [10]:
print(array[:, -1]) # Get the last column
print(array[:, 0]) # First column

[ 7 14]
[1 8]


### Getting a little more fancy 
`[row, startindex : endindex : stepindex]`

In [11]:
array[0, 1: -1: 2]

array([2, 4, 6])

### Change value of an element

In [12]:
array[1, -1] = 100 # Last element
print(array, "\n")

array[:, 0] = [8, 1] # First column
array

[[  1   2   3   4   5   6   7]
 [  8   9  10  11  12  13 100]] 



array([[  8,   2,   3,   4,   5,   6,   7],
       [  1,   9,  10,  11,  12,  13, 100]])

### Boolean index

In [13]:
print(f"Print true or false: {array % 2 == 0}\n")
print(f"Print values: {array[array % 2 == 0]}")
print(f"Print values: {char_array[char_array == 'a']}")

Print true or false: [[ True  True False  True False  True False]
 [False False  True False  True False  True]]

Print values: [  8   2   4   6  10  12 100]
Print values: ['a']


## Masks
A masked array is the combination of a standard numpy.ndarray and a mask. When a mask element is False, the corresponding element in the associated array is valid and is said to be unmasked. When a mask element is True, the corresponding element in the associated array is said to be masked (invalid).

In [14]:
import numpy.ma as ma

array = np.array([1, 2, 3, -1, 4])

# We define the negative value as invalid
mask_array = ma.masked_array(array, mask = [0, 0, 0, 1, 0])
mask_array

masked_array(data=[1, 2, 3, --, 4],
             mask=[False, False, False,  True, False],
       fill_value=999999)

In [15]:
# min is 1 not -1
mask_array.min()

1

We can also define the mask directly in the array constructor of the ma module.

In [16]:
mask_array = ma.array([1, 2, 3, -1, 4], mask = [0, 0, 0, 1, 0])
mask_array

masked_array(data=[1, 2, 3, --, 4],
             mask=[False, False, False,  True, False],
       fill_value=999999)

If we want to retrieve only valid values, we use the `compressed()` method

In [17]:
mask_array.compressed()

array([1, 2, 3, 4])

I can mask or unmask all elements by assigning True or False to the entire mask

In [18]:
mask_array.mask = True
mask_array

masked_array(data=[--, --, --, --, --],
             mask=[ True,  True,  True,  True,  True],
       fill_value=999999,
            dtype=int32)

In [19]:
mask_array.mask = False
mask_array

masked_array(data=[1, 2, 3, -1, 4],
             mask=[False, False, False, False, False],
       fill_value=999999)

We can query if a value is valid with the `ma.masked` method

In [20]:
mask_array.mask = [0, 0, 0, 1, 0]

print(mask_array[0] is ma.masked)
print(mask_array[3] is ma.masked)

False
True


We can 'fill' the masked values with a specific value. `array.filled(value)`

In [21]:
mask_array.filled(0)

array([1, 2, 3, 0, 4])

### Mask methods

In [22]:
array

array([ 1,  2,  3, -1,  4])

Mask an array where equal to a given value. `ma.masked_equal(array, value)`

In [23]:
ma.masked_equal(array, 4)

masked_array(data=[1, 2, 3, -1, --],
             mask=[False, False, False, False,  True],
       fill_value=4)

Mask an array where `not` equal to a given value. `ma.masked_not_equal(array, value)`

In [24]:
ma.masked_not_equal(array, 2)

masked_array(data=[--, 2, --, --, --],
             mask=[ True, False,  True,  True,  True],
       fill_value=999999)

Mask an array where a condition is met. `ma.masked_where(condition, array)`

In [25]:
ma.masked_where(array < 2, array)

masked_array(data=[--, 2, 3, --, 4],
             mask=[ True, False, False,  True, False],
       fill_value=999999)

## Dates
`np.datetime64("year-month-day")`

In [26]:
date = np.datetime64("2020-09-01")
date

numpy.datetime64('2020-09-01')

### Dates with hour
`np.datetime64("year-month-dayThour:min")`

In [27]:
date = np.datetime64("2020-09-01T14:30")
date

numpy.datetime64('2020-09-01T14:30')

### Creating Date Arrays
In this case is mandatory to indicate `dtype = "datetime64"`

In [28]:
array = np.array(['2020-07-01', '2020-08-01', '2020-09-01'], dtype = "datetime64")
array

array(['2020-07-01', '2020-08-01', '2020-09-01'], dtype='datetime64[D]')

We can also create date arrays with the numpy iterator `np.arange(start, end, step)`

In [29]:
np.arange('2020-08', '2020-09', dtype = 'datetime64[D]')

array(['2020-08-01', '2020-08-02', '2020-08-03', '2020-08-04',
       '2020-08-05', '2020-08-06', '2020-08-07', '2020-08-08',
       '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12',
       '2020-08-13', '2020-08-14', '2020-08-15', '2020-08-16',
       '2020-08-17', '2020-08-18', '2020-08-19', '2020-08-20',
       '2020-08-21', '2020-08-22', '2020-08-23', '2020-08-24',
       '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28',
       '2020-08-29', '2020-08-30', '2020-08-31'], dtype='datetime64[D]')

In [30]:
np.arange('2020-08', '2020-09', dtype = 'datetime64[W]')

array(['2020-07-30', '2020-08-06', '2020-08-13', '2020-08-20'],
      dtype='datetime64[W]')

### Date comparisons

In [31]:
np.datetime64('2020') == np.datetime64('2020-01-01')

True

In [32]:
np.datetime64('2020-03-14T11') == np.datetime64('2020-03-14T11:00:00.00')

True

### Operations Dates
The type *timedelta64* is created, which uses the same characters of 'Y', 'M', 'D', 'h', 'm', 's' for its creation.

In [33]:
date = np.timedelta64(8, 'D') # 8 days is equal to 1 week
np.timedelta64(date, 'W') # So 1 week

numpy.timedelta64(1,'W')

In [34]:
np.datetime64('2020-08-01') - np.datetime64('2020-07-01')

numpy.timedelta64(31,'D')

In [35]:
np.datetime64('2020-08-01') + np.timedelta64(10, 'D') # Sum 10 Days
np.datetime64('2020-08-01') - np.timedelta64(1, 'W') # Subtract 1 week
np.datetime64('2020-08-01') + np.timedelta64(48, 'h') # Sum 48 hours

numpy.datetime64('2020-08-03T00','h')

### Business days

In [36]:
np.busday_offset('2024-01-16', 4) # Tuesday add 4 days so it is monday instead of saturday

numpy.datetime64('2024-01-22')

### Is a day a business day 
`np.is_busday(np.datetime64(date))`

In [37]:
# Thursday
print(np.is_busday(np.datetime64('2020-09-03')))
# Saturday
print(np.is_busday(np.datetime64('2020-09-05')))

True
False


### Counting business days

In [38]:
np.busday_count(np.datetime64('2020-09-01'), np.datetime64('2020-09-30'))

21

## Maths and statistics

In [39]:
stats = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

print("min:", np.min(stats))
print("max of each row:", np.max(stats, axis = 1))
print("Sum of eeach col:", np.sum(stats, axis = 0))
print(stats + 2)
print(stats - 2)
print(stats * 2)
print(stats / 2)
print(np.cos(stats))

min: 1
max of each row: [4 8]
Sum of eeach col: [ 6  8 10 12]
[[ 3  4  5  6]
 [ 7  8  9 10]]
[[-1  0  1  2]
 [ 3  4  5  6]]
[[ 2  4  6  8]
 [10 12 14 16]]
[[0.5 1.  1.5 2. ]
 [2.5 3.  3.5 4. ]]
[[ 0.54030231 -0.41614684 -0.9899925  -0.65364362]
 [ 0.28366219  0.96017029  0.75390225 -0.14550003]]


## Chain Operations

Returns the concatenation of element strings for two str or unicode arrays.
`np.add()`

In [40]:
a = np.array(['A', 'B', 'C'])
b = np.array(['D', 'E', 'F'])

np.char.add(a, b)

array(['AD', 'BE', 'CF'], dtype='<U2')

multiply - Returns (a * i), i.e. multiple concatenation of strings, by elements.

In [41]:
np.char.multiply(a, [2, 3, 4])

array(['AA', 'BBB', 'CCCC'], dtype='<U4')

capitalize - Returns a copy of a with only the first character of each element uppercase.

In [42]:
names = np.array(['maria', 'antonio', 'francisco'])
np.char.capitalize(names)

array(['Maria', 'Antonio', 'Francisco'], dtype='<U9')

replace - Returns an array after replacing the characters

In [43]:
np.char.replace(names, 'mar', 'sof')

array(['sofia', 'antonio', 'francisco'], dtype='<U9')

split - For each element of a, returns a list of the words in the string, using sep 

In [44]:
names = np.array(['Mi nombre es Abraham Requena'])
print(np.char.split(names, sep = ' '))

names = np.array(["   hello   ", "    GOOD BYE ", " 123   "])
names = np.char.strip(names)
names

[list(['Mi', 'nombre', 'es', 'Abraham', 'Requena'])]


array(['hello', 'GOOD BYE', '123'], dtype='<U13')

### Comparisons
- `np.char.equal(a, b)`
- `np.char.not_equal(a, b)`
- `np.char.startswith(arrat, prefix = value)`
- `np.char.islower(array)`
- `np.char.isupper(array)`
- `np.char.isdigit(array)`

In [45]:
# equal / not equal - Valores iguales
a = np.array(['A', 'B', 'C'], dtype = np.str_)
b = np.array(['D', 'B', 'F'], dtype = np.str_)

print(np.char.equal(a, b))
print(np.char.not_equal(a, b))
print(np.char.startswith(names, prefix = 'he'))
print(np.char.islower(names))
print(np.char.isupper(names))
print(np.char.isdigit(names))

[False  True False]
[ True False  True]
[ True False False]
[ True False False]
[False  True False]
[False False  True]


## Load data from file

In [46]:
data_file = np.genfromtxt("data.txt", delimiter = ",", dtype = "int32")

# Boolean masking
data_boolean = data_file > 10
print(data_boolean, "\n")

# Get the elements depending of a conditional
data_indexing = data_file[(data_file > 10) & (data_file < 15)]
print(data_indexing, "\n")

print(np.any([data_file > 10], axis = 0))

[[False False False False False False False False False False]
 [False  True  True  True  True  True  True  True  True  True]] 

[11 12 13 14] 

[[False False False False False False False False False False]
 [False  True  True  True  True  True  True  True  True  True]]


## Exercises

- An integer array containing only the odd numbers from 1 to 100.
- String array with your first and last name.
- Date array containing all days from 01 January 2020 to 04 February 2020.

In [47]:
array = np.arange(1, 100, 2)
np.array(["Rafael", "Gomez"], dtype = np.str_)
dates = np.arange("2020-01-01", "2020-02-04", dtype="datetime64[D]")

Retrieves the last 10 items from the odd-numbered array

In [48]:
array[-10:]

array([81, 83, 85, 87, 89, 91, 93, 95, 97, 99])

Cycle through the date array, and generate a new numpy array containing only business days

In [49]:
business_days = np.array([], dtype = "datetime64")

for date in dates:
  if (np.is_busday(date)):
    business_days = np.append(business_days, date)

business_days

array(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-06',
       '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
       '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
       '2020-01-17', '2020-01-20', '2020-01-21', '2020-01-22',
       '2020-01-23', '2020-01-24', '2020-01-27', '2020-01-28',
       '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-03'],
      dtype='datetime64[D]')

In [50]:
matrix = np.array([[10, 1, 8, 4], [3, 7, 2, 1], [0, 2, 20, 12]])
ma.masked_where(matrix >= 10, matrix)

masked_array(
  data=[[--, 1, 8, 4],
        [3, 7, 2, 1],
        [0, 2, --, --]],
  mask=[[ True, False, False, False],
        [False, False, False, False],
        [False, False,  True,  True]],
  fill_value=999999)

Create two dates, the first of which will be your date of birth, and the second, that of a family member or friend of yours.

- What is the difference in time between the two dates?
- What would your date of birth be if you had been born 236 hours earlier?

In [51]:
birth_date = np.datetime64("2003-06-24")
birth_date_dad = np.datetime64("1971-05-08")

print(birth_date - birth_date_dad)
print(birth_date - np.timedelta64(236, "h"))

11735 days
2003-06-14T04
