# Agenda

1. 2D NumPy
2. Pandas
    - Series (1D)
    - Data frame (2D)
3. Reading from CSV

In [1]:
import numpy as np

In [5]:
np.random.seed(0)
a = np.random.randint(0, 100, 20)
a

array([44, 47, 64, 67, 67,  9, 83, 21, 36, 87, 70, 88, 88, 12, 58, 65, 39,
       87, 46, 88])

In [3]:
a.dtype

dtype('int64')

In [7]:
np.random.seed(0)
a = np.random.randint(0, 100, 20, dtype=np.int8)
a

array([44, 10, 12, 47, 42, 68, 23, 38, 22, 55, 64, 76, 33, 88, 67, 51, 78,
       26, 82, 34], dtype=int8)

In [8]:
b = np.array([10, 20, 30, 40])

In [9]:
a + b

ValueError: operands could not be broadcast together with shapes (20,) (4,) 

In [10]:
a.shape

(20,)

In [11]:
b.shape

(4,)

In [12]:
a.shape = (5, 4)

In [13]:
a

array([[44, 10, 12, 47],
       [42, 68, 23, 38],
       [22, 55, 64, 76],
       [33, 88, 67, 51],
       [78, 26, 82, 34]], dtype=int8)

In [14]:
np.random.seed(0)
a = np.random.randint(0, 100, 20, dtype=np.int8).reshape(5, 4)

In [15]:
a

array([[44, 10, 12, 47],
       [42, 68, 23, 38],
       [22, 55, 64, 76],
       [33, 88, 67, 51],
       [78, 26, 82, 34]], dtype=int8)

In [16]:
np.random.seed(0)
a = np.random.randint(0, 100, [5,4], dtype=np.int8)
a

array([[44, 10, 12, 47],
       [42, 68, 23, 38],
       [22, 55, 64, 76],
       [33, 88, 67, 51],
       [78, 26, 82, 34]], dtype=int8)

In [19]:
# how can I get to 64?

a[2][2]    # don't do this, even if it works

64

In [21]:
a[2,2]    # this is how we should retrieve from NumPy

64

In [22]:
a[0, 2]

12

In [23]:
a[2, 3]  # this is a tuple 

76

In [24]:
a[(2,3)]      # this syntax works, but is ugly -- don't do it

76

In [25]:
# what if I want both 55 and 64 from index 2?

a[2, [1,2]]      # row index 2, columns 1+2

array([55, 64], dtype=int8)

In [26]:
a[2, 1:3]        # row index 2, columns 1 up to, not including 3

array([55, 64], dtype=int8)

In [27]:
a[1:3, 1:3]

array([[68, 23],
       [55, 64]], dtype=int8)

In [28]:
a[1:4:2, 1:3]   # rows 1+3, columns 1+2

array([[68, 23],
       [88, 67]], dtype=int8)

In [29]:
a

array([[44, 10, 12, 47],
       [42, 68, 23, 38],
       [22, 55, 64, 76],
       [33, 88, 67, 51],
       [78, 26, 82, 34]], dtype=int8)

In [30]:
a.sum()

960

In [31]:
a.sum(axis=0)   # sums all of the numbers in each column, gives us a new row-sized array

array([219, 247, 248, 246])

In [32]:
a.sum(axis=1)   # sums all of the numbers in each row, gives us a new column-sized array

array([113, 171, 217, 239, 220])

In [33]:
a.min(axis=0)

array([22, 10, 12, 34], dtype=int8)

In [34]:
a.max(axis=1)

array([47, 68, 76, 88, 82], dtype=int8)

In [35]:
a.std(axis=0)

array([18.78722971, 28.16806703, 27.13374283, 14.71597771])

In [36]:
a.mean(axis=0)

array([43.8, 49.4, 49.6, 49.2])

# Exercises: NumPy

1. Create a 2-dimensional NumPy array with 45 numbers from 0-100, with 5 rows and 9 columns.
2. Retrieve row index 2.
3. Retrieve column index 3.  (Think about a slice, and how you could get the items.)
4. Retrieve rows index 1 and 4.
5. Retrieve column indexes 1 and 4.
6. Get the mean of the even items in row index 4.
7. Get the mean of the odd items in column index 3.

In [37]:
a

array([[44, 10, 12, 47],
       [42, 68, 23, 38],
       [22, 55, 64, 76],
       [33, 88, 67, 51],
       [78, 26, 82, 34]], dtype=int8)

In [38]:
a.transpose()

array([[44, 42, 22, 33, 78],
       [10, 68, 55, 88, 26],
       [12, 23, 64, 67, 82],
       [47, 38, 76, 51, 34]], dtype=int8)

In [39]:
a.T

array([[44, 42, 22, 33, 78],
       [10, 68, 55, 88, 26],
       [12, 23, 64, 67, 82],
       [47, 38, 76, 51, 34]], dtype=int8)

In [40]:
np.random.seed(0)
a = np.random.randint(0, 100, [5, 9], dtype=np.int8)
a

array([[44, 10, 12, 47, 42, 68, 23, 38, 22],
       [55, 64, 76, 33, 88, 67, 51, 78, 26],
       [82, 34, 91, 67, 11, 88,  9, 36, 83],
       [94, 33, 31, 21, 81, 89, 37, 86, 98],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [41]:
# row index 2
a[2]

array([82, 34, 91, 67, 11, 88,  9, 36, 83], dtype=int8)

In [42]:
# column index 3
a.T[3]

array([47, 33, 67, 21, 87], dtype=int8)

In [44]:
# column index 3, using a slice
a[0:5, 3]

array([47, 33, 67, 21, 87], dtype=int8)

In [45]:
# column index 3, using a different slice
a[:, 3]

array([47, 33, 67, 21, 87], dtype=int8)

In [46]:
# rows at index 1+4

a[[1,4]]

array([[55, 64, 76, 33, 88, 67, 51, 78, 26],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [50]:
# row indexes 1+4, using a slice
a[1:5:3]

array([[55, 64, 76, 33, 88, 67, 51, 78, 26],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [51]:
a[:, [1,4]]

array([[10, 42],
       [64, 88],
       [34, 11],
       [33, 81],
       [55, 58]], dtype=int8)

In [52]:
a[:, 1:5:3]

array([[10, 42],
       [64, 88],
       [34, 11],
       [33, 81],
       [55, 58]], dtype=int8)

In [56]:
# mean of even items in row index 4
a[4][a[4] % 2 == 0].mean()

60.0

In [60]:
# mean of odd items in column index 3
a[:, 3][a[:, 3] % 2 == 1].mean()

51.0

In [61]:
a

array([[44, 10, 12, 47, 42, 68, 23, 38, 22],
       [55, 64, 76, 33, 88, 67, 51, 78, 26],
       [82, 34, 91, 67, 11, 88,  9, 36, 83],
       [94, 33, 31, 21, 81, 89, 37, 86, 98],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [62]:
b = a.reshape(9,5)
b

array([[44, 10, 12, 47, 42],
       [68, 23, 38, 22, 55],
       [64, 76, 33, 88, 67],
       [51, 78, 26, 82, 34],
       [91, 67, 11, 88,  9],
       [36, 83, 94, 33, 31],
       [21, 81, 89, 37, 86],
       [98, 36, 55,  5, 87],
       [58, 43, 76, 70, 60]], dtype=int8)

In [63]:
a.reshape(3,4)

ValueError: cannot reshape array of size 45 into shape (3,4)

In [64]:
a

array([[44, 10, 12, 47, 42, 68, 23, 38, 22],
       [55, 64, 76, 33, 88, 67, 51, 78, 26],
       [82, 34, 91, 67, 11, 88,  9, 36, 83],
       [94, 33, 31, 21, 81, 89, 37, 86, 98],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [65]:
b

array([[44, 10, 12, 47, 42],
       [68, 23, 38, 22, 55],
       [64, 76, 33, 88, 67],
       [51, 78, 26, 82, 34],
       [91, 67, 11, 88,  9],
       [36, 83, 94, 33, 31],
       [21, 81, 89, 37, 86],
       [98, 36, 55,  5, 87],
       [58, 43, 76, 70, 60]], dtype=int8)

In [66]:
id(a)

4926381136

In [67]:
id(b)

4926388720

In [68]:
a[0, 0] = 99

In [69]:
a

array([[99, 10, 12, 47, 42, 68, 23, 38, 22],
       [55, 64, 76, 33, 88, 67, 51, 78, 26],
       [82, 34, 91, 67, 11, 88,  9, 36, 83],
       [94, 33, 31, 21, 81, 89, 37, 86, 98],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)

In [70]:
b

array([[99, 10, 12, 47, 42],
       [68, 23, 38, 22, 55],
       [64, 76, 33, 88, 67],
       [51, 78, 26, 82, 34],
       [91, 67, 11, 88,  9],
       [36, 83, 94, 33, 31],
       [21, 81, 89, 37, 86],
       [98, 36, 55,  5, 87],
       [58, 43, 76, 70, 60]], dtype=int8)

In [72]:
help(np.reshape)

Help on function reshape in module numpy:

reshape(a, newshape, order='C')
    Gives a new shape to an array without changing its data.
    
    Parameters
    ----------
    a : array_like
        Array to be reshaped.
    newshape : int or tuple of ints
        The new shape should be compatible with the original shape. If
        an integer, then the result will be a 1-D array of that length.
        One shape dimension can be -1. In this case, the value is
        inferred from the length of the array and remaining dimensions.
    order : {'C', 'F', 'A'}, optional
        Read the elements of `a` using this index order, and place the
        elements into the reshaped array using this index order.  'C'
        means to read / write the elements using C-like index order,
        with the last axis index changing fastest, back to the first
        axis index changing slowest. 'F' means to read / write the
        elements using Fortran-like index order, with the first index
        c

In [73]:
b = a.reshape(9, 5).copy()

In [75]:
help(np.copy)

Help on function copy in module numpy:

copy(a, order='K', subok=False)
    Return an array copy of the given object.
    
    Parameters
    ----------
    a : array_like
        Input data.
    order : {'C', 'F', 'A', 'K'}, optional
        Controls the memory layout of the copy. 'C' means C-order,
        'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous,
        'C' otherwise. 'K' means match the layout of `a` as closely
        as possible. (Note that this function and :meth:`ndarray.copy` are very
        similar, but have different default values for their order=
        arguments.)
    subok : bool, optional
        If True, then sub-classes will be passed-through, otherwise the
        returned array will be forced to be a base-class array (defaults to False).
    
        .. versionadded:: 1.19.0
    
    Returns
    -------
    arr : ndarray
        Array interpretation of `a`.
    
    See Also
    --------
    ndarray.copy : Preferred method for creating an arr

In [76]:
b.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [77]:
b= a.reshape(9, 5)
b.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [78]:
a

array([[99, 10, 12, 47, 42, 68, 23, 38, 22],
       [55, 64, 76, 33, 88, 67, 51, 78, 26],
       [82, 34, 91, 67, 11, 88,  9, 36, 83],
       [94, 33, 31, 21, 81, 89, 37, 86, 98],
       [36, 55,  5, 87, 58, 43, 76, 70, 60]], dtype=int8)