# Personal Week 2 Review of Numpy and Pandas


In [1]:
import numpy as np
import pandas as pd

## Numpy Arrays


In [2]:
integer_array = np.array(([i for i in range(0, 5)], [i for i in range(5, 10)]))

# Important note, when making an array you must wrap everything into one single tuple or list

integer_array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [3]:
np.zeros((3, 2, 4))

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [4]:
np.ones((2, 4, 2))

array([[[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]]])

In [5]:
np.eye(4, 3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 0.]])

In [6]:
np.eye(8)

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [7]:
np.random.default_rng().random((2, 3, 4),)

array([[[0.72293011, 0.17519132, 0.05397256, 0.7906111 ],
        [0.39175034, 0.2737619 , 0.14739412, 0.12649368],
        [0.74337358, 0.67281931, 0.93480292, 0.61658234]],

       [[0.01107956, 0.69948349, 0.627851  , 0.90229418],
        [0.14924805, 0.7580697 , 0.6253038 , 0.6908552 ],
        [0.44079922, 0.66814508, 0.82751554, 0.99324646]]])

In [8]:
np.indices((2, 3, 4))

# Note that indices has the shape:
# ({length of origin shape}, {rest of origin shape})

array([[[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]],


       [[[0, 0, 0, 0],
         [1, 1, 1, 1],
         [2, 2, 2, 2]],

        [[0, 0, 0, 0],
         [1, 1, 1, 1],
         [2, 2, 2, 2]]],


       [[[0, 1, 2, 3],
         [0, 1, 2, 3],
         [0, 1, 2, 3]],

        [[0, 1, 2, 3],
         [0, 1, 2, 3],
         [0, 1, 2, 3]]]])

In [9]:
np.shape([[5, 3], [2, 3]])

(2, 2)

In [10]:
np.shape(3)

()

In [11]:
np.shape([2])

(1,)

In [12]:
letters = np.array([[[c for c in "abcde"], [c for c in "fghij"]],
                   [[c for c in "klmno"], [c for c in "qrstu"]],
                   [[c for c in "vwxyz"], [c for c in "12345"]]])
letters

array([[['a', 'b', 'c', 'd', 'e'],
        ['f', 'g', 'h', 'i', 'j']],

       [['k', 'l', 'm', 'n', 'o'],
        ['q', 'r', 's', 't', 'u']],

       [['v', 'w', 'x', 'y', 'z'],
        ['1', '2', '3', '4', '5']]], dtype='<U1')

In [13]:
letters[0, 0]

array(['a', 'b', 'c', 'd', 'e'], dtype='<U1')

In [14]:
letters[0]

array([['a', 'b', 'c', 'd', 'e'],
       ['f', 'g', 'h', 'i', 'j']], dtype='<U1')

In [15]:
letters[0, 0, 0]

'a'

In [16]:
letters[2, 0, 2]

'x'

In [17]:
letters[:2]

array([[['a', 'b', 'c', 'd', 'e'],
        ['f', 'g', 'h', 'i', 'j']],

       [['k', 'l', 'm', 'n', 'o'],
        ['q', 'r', 's', 't', 'u']]], dtype='<U1')

In [18]:
letters[1:, :, :3]

array([[['k', 'l', 'm'],
        ['q', 'r', 's']],

       [['v', 'w', 'x'],
        ['1', '2', '3']]], dtype='<U1')

## Vector Addition

Simply add two np arrays and each cell will be added to the other.

This works for every math operation, so you can not properly perform matrix multiplication with:

```
matrix_a * matrix_b != proper matrix
```


In [19]:
vector_a = np.array([3, 5])
vector_b = np.array([2, 7])

vector_a + vector_b

array([ 5, 12])

In [20]:
# The products of the original shape and final shape need to be identical
letters_reshaped = letters.reshape((3, 5, 2))
letters_reshaped

array([[['a', 'b'],
        ['c', 'd'],
        ['e', 'f'],
        ['g', 'h'],
        ['i', 'j']],

       [['k', 'l'],
        ['m', 'n'],
        ['o', 'q'],
        ['r', 's'],
        ['t', 'u']],

       [['v', 'w'],
        ['x', 'y'],
        ['z', '1'],
        ['2', '3'],
        ['4', '5']]], dtype='<U1')

In [21]:
letters.reshape((2, -1))
# -1 will auto solve for the missing dimension (only one missing can exist)

array([['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
        'n', 'o'],
       ['q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3',
        '4', '5']], dtype='<U1')

# Pandas


## Series


In [22]:
ggst_series = pd.Series("Chipp Zato-1 Sol_Badguy Millia_Rage".split(' '))
ggst_series

0          Chipp
1         Zato-1
2     Sol_Badguy
3    Millia_Rage
dtype: object

In [23]:
ggst_series[2]

'Sol_Badguy'

In [24]:
archetypes = pd.Index("Fast Puppet Bruiser Jumpy".split(' '))
archetypes

Index(['Fast', 'Puppet', 'Bruiser', 'Jumpy'], dtype='object')

In [25]:
ggst_series.set_axis(archetypes)

Fast             Chipp
Puppet          Zato-1
Bruiser     Sol_Badguy
Jumpy      Millia_Rage
dtype: object

## Data Frames


In [26]:
foods = ["Mocha_Protein_Powder Whey_Isolate Vanilla_Almond_Milk".split(' ')]
foods

[['Mocha_Protein_Powder', 'Whey_Isolate', 'Vanilla_Almond_Milk']]

In [27]:
protein = pd.Series([15, 30, 2], name="protein")
fat = pd.Series([0, 1, 4], name="fat")
carbs = pd.Series([4, 0, 2], name="carbs")
macro_frame = pd.DataFrame(
    data={"Protein": protein, "Fat": fat, "Carbs": carbs}).set_index(foods)
macro_frame.index.name = "Foods"

In [28]:
macro_frame

Unnamed: 0_level_0,Protein,Fat,Carbs
Foods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mocha_Protein_Powder,15,0,4
Whey_Isolate,30,1,0
Vanilla_Almond_Milk,2,4,2


### Insight From Slides about DataFrames:

Dataframes can be made of:

- 2D arrays
- List of lists (pay attention to size and data types)
- List of dictionaries
- Dictionaries of lists
- Dictionary of Series
- Dictionary of dictionaries


In [29]:
pd.DataFrame(
    [[1, 2, 3], [4, 5, 6]],
    columns=['column 1', 'column 2', 'column 3'],
    index=['a', 'b']
)

Unnamed: 0,column 1,column 2,column 3
a,1,2,3
b,4,5,6


### Cool Tips & Tricks

| Function           | Description                                                                             |
| ------------------ | --------------------------------------------------------------------------------------- |
| df.columns         | Get an Index of the column names                                                        |
| df.index           | get the Index for each Series (i.e. the rows)                                           |
| df['col']          | You can access it like a dictionary! This gets the series associated with “col”         |
| df['col'][row_num] | You can index it with multiple axes, this gets the value in column “col” at row row_num |
| df.values          | get access to the underlying numpy array                                                |
| df.iterrows()      | Gives a sequence of (i,row) for looping. Try: for i,row in df.iterrows()                |

### Super Cool Tricks:

| Function                           | Description                          |
| ---------------------------------- | ------------------------------------ |
| pd.read_csv('path_to_file.csv')    | Turns CSV File into a DataFrame      |
| pd.read_excel('path_to_file.xlsx') | Turns an Excel file into a Dataframe |


In [30]:
macro_frame

Unnamed: 0_level_0,Protein,Fat,Carbs
Foods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mocha_Protein_Powder,15,0,4
Whey_Isolate,30,1,0
Vanilla_Almond_Milk,2,4,2


In [31]:
macro_frame.loc["Vanilla_Almond_Milk"]

Protein    2
Fat        4
Carbs      2
Name: Vanilla_Almond_Milk, dtype: int64

In [32]:
macro_frame.loc["Vanilla_Almond_Milk", "Fat"]

4

In [33]:
macro_frame.loc[:, "Fat"]

Foods
Mocha_Protein_Powder    0
Whey_Isolate            1
Vanilla_Almond_Milk     4
Name: Fat, dtype: int64

In [38]:
macro_frame.iloc[0, 0]

# Protein for Mocha

15