In [1]:
import sys
import numpy as np

# Basics

In [26]:
myarr = np.array([1,2,3], dtype='float64')
print(myarr)

random = np.random.randint(101, size=[5,5])
random

[1. 2. 3.]


array([[27, 79, 79, 63, 75],
       [73, 27, 20, 69, 76],
       [64, 29, 89, 90, 65],
       [44, 45, 23, 29, 96],
       [ 0, 97, 78, 49, 52]])

In [18]:
random[2:4,1:3]

array([[42, 90],
       [53, 30]])

In [23]:
# You can select multiple individual elements from the matrix in this way.
random[[0, 2, -1]]

array([[80, 35, 96, 54, 51],
       [85, 42, 90, 77, 47],
       [82,  9, 34, 24, 59]])

## Array Types

In [27]:
myarr.dtype

dtype('float64')

In [25]:
random.shape

(5, 5)

In [40]:
charArr = np.array(['a', 'c', 'e'])
charArr.dtype

dtype('<U1')

## Dimensions and Shapes

In [29]:
random.shape

(5, 5)

In [30]:
myarr.shape

(3,)

In [31]:
random.size

25

In [32]:
charArr.size

3

In [41]:
charArr = np.append(charArr, 'b')
charArr.size

4

array(['a', 'c', 'e'], dtype='<U1')

## Basic Stats

In [10]:
sample_arr = np.random.randint(101, size=[10, 10])
sample_arr

array([[  6,  65,  64,  88,  83,   3,  95,  24,  53,  92],
       [ 58,  29,  18,  48,  65,  83,  29,  69,   5,  84],
       [  8,  62,  62,  84,   6,  85,  68,  19,  28,  42],
       [ 36,  62, 100,  19,  11,   4,  97,  74,  24,  67],
       [ 48,  31,  31,  93,  26,  87,  71,  35,  82,  83],
       [ 18,  13,  78,   2,  82,  93,  69,  85,  17,  52],
       [ 12,   3,  21,  86,  84,  30,  22,  31,  45,  98],
       [ 95,  72,  34,   0,  29,  41,  83,  46,  22,  73],
       [ 35,  70,  30,  98,  20,  24,  91,   4,  14,  72],
       [  7,  18,  70,  20,  95,  66,  19, 100,  28,   9]])

In [44]:
sample_arr.sum()

4609

In [45]:
sample_arr.mean()

46.09

In [46]:
sample_arr.std()

30.58728330532151

In [47]:
# We can specify specific regions of the arr for stats
sample_arr[0].sum()

443

In [48]:
sample_arr[0].mean()

44.3

In [49]:
# We can also specify it by axis
sample_arr.sum(axis=0)

array([402, 582, 481, 376, 490, 538, 243, 515, 445, 537])

## Broadcasting and Vectorized Operations

In [57]:
newArr = np.arange(10)
newArr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [54]:
newArr + 10

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [58]:
newArr += 10
newArr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [66]:
a = np.arange(10, 101)
b = np.arange(-100, - 9)
a + b

array([-90, -88, -86, -84, -82, -80, -78, -76, -74, -72, -70, -68, -66,
       -64, -62, -60, -58, -56, -54, -52, -50, -48, -46, -44, -42, -40,
       -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14,
       -12, -10,  -8,  -6,  -4,  -2,   0,   2,   4,   6,   8,  10,  12,
        14,  16,  18,  20,  22,  24,  26,  28,  30,  32,  34,  36,  38,
        40,  42,  44,  46,  48,  50,  52,  54,  56,  58,  60,  62,  64,
        66,  68,  70,  72,  74,  76,  78,  80,  82,  84,  86,  88,  90])

## Boolean Arrays

In [2]:
a = np.arange(4)
a

array([0, 1, 2, 3])

In [3]:
a[[0, -1]]

array([0, 3])

In [4]:
# Select elements by boolean vals
a[[True, False, True, False]]

array([0, 2])

In [5]:
# Compare the whole iterator with a condition
a >= 2

array([False, False,  True,  True])

In [6]:
# Select all elements that satisfy the inner condition
a[a>=2]

array([2, 3])

In [7]:
# Select all elements that are greater than the mean
a[a > a.mean()]

array([2, 3])

In [8]:
# Select all elements that are NOT greater than the iterator mean
a[~(a > a.mean())]

array([0, 1])

## Linear Algebra

In [20]:
a = np.arange(1, 10)
a = a.reshape((3,3))
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [21]:
b = np.random.randint(1, 10, size=(3,3))
b

array([[9, 1, 1],
       [4, 5, 3],
       [1, 5, 9]])

In [22]:
a.dot(b)

array([[ 20,  26,  34],
       [ 62,  59,  73],
       [104,  92, 112]])

## Size of Objects in Memory

In [23]:
# Regular python int = 24bytes
sys.getsizeof(1)

28

In [24]:
# Regular size of long in python
sys.getsizeof(10**100)

72

In [25]:
# Numpy ints
np.dtype(int).itemsize

4

In [26]:
# Numpy floats
np.dtype(float).itemsize

8

In [29]:
# Regular python list size
sys.getsizeof([1])

64

In [30]:
# Numpy int array o n size
np.array([1]).nbytes

4

## Difference in Time Complexity / Performance

In [38]:
myList = list(range(1000000))

In [39]:
a = np.arange(1000000)

In [40]:
%time np.sum(a ** 2)

Wall time: 3 ms


584144992

In [41]:
%time sum([item ** 2 for item in myList])

Wall time: 239 ms


333332833333500000