# Numpy

In [80]:
import numpy as np

In [81]:
arr = np.array([1,2,3,4,5])
arr[1] = 23.232
arr

array([ 1, 23,  3,  4,  5])

In [82]:
arr = np.array([1, 2, 3, 4, 5], dtype=np.float64)
arr

array([1., 2., 3., 4., 5.])

In [83]:
arr[1] = 23.232
arr

array([ 1.   , 23.232,  3.   ,  4.   ,  5.   ])

In [84]:
arr[1] = 'asd'
arr

ValueError: could not convert string to float: 'asd'

In [None]:
one = np.ones((3,4))
one

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [85]:
zeros = np.zeros((3,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [86]:
empty = np.empty((3,3))
empty

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [87]:
ranges = np.arange(1, 10, 2)
ranges

array([1, 3, 5, 7, 9])

In [88]:
ones = np.ones(5, dtype=np.int16)
ones

array([1, 1, 1, 1, 1], dtype=int16)

Random values

In [89]:
np.random.seed(0)

random = np.random.randint(-1, 5, size = 10)
random, len(random)


(array([ 3,  4, -1,  2,  2,  2,  0,  2,  4,  1]), 10)

In [90]:
one.ndim, one.shape, one.size

(2, (3, 4), 12)

In [91]:
print(random)
print(random[1:4])
print(random[::2])

[ 3  4 -1  2  2  2  0  2  4  1]
[ 4 -1  2]
[ 3 -1  2  0  4]


In [92]:
np.random.seed(2)
multi_random = np.random.randint(10, size=(2,4))
print(multi_random)
multi_random[1][0:2]

[[8 8 6 2]
 [8 7 2 1]]


array([8, 7])

In [93]:
multi_random[1, 0:2]

array([8, 7])

In [94]:
multi_random[:, 0:2]

array([[8, 8],
       [8, 7]])

In [95]:
multi_random[:, 3:]

array([[2],
       [1]])

In [96]:
multi_random

array([[8, 8, 6, 2],
       [8, 7, 2, 1]])

In [97]:
inst = multi_random[:, -3:]
inst

array([[8, 6, 2],
       [7, 2, 1]])

In [98]:
inst[0][0] = 12321
inst

array([[12321,     6,     2],
       [    7,     2,     1]])

In [99]:
multi_random

array([[    8, 12321,     6,     2],
       [    8,     7,     2,     1]])

In [100]:
copy_inst = multi_random[:, -3:].copy()
print(copy_inst)
copy_inst[0][0] = 223
print(copy_inst)
multi_random

[[12321     6     2]
 [    7     2     1]]
[[223   6   2]
 [  7   2   1]]


array([[    8, 12321,     6,     2],
       [    8,     7,     2,     1]])

In [104]:
np.random.seed(0)
m = np.random.randint(10, size = 100000)

def standard_double(array):
    output = np.empty(array.size)
    for i in range(array.size):
        output[i] = array[i] * 2
    return output

In [107]:
import timeit
timeit.default_timer()
standard_double(m)

array([10.,  0.,  6., ..., 16.,  8., 12.], shape=(100000,))

Adding and multiplying arrays

In [109]:
np.array([1,2,3,4]) + np.array([3,5,6,7])

array([ 4,  7,  9, 11])

In [110]:
np.array([1, 2, 3, 4]) * 2

array([2, 4, 6, 8])

In [112]:
np.array([1,2,3,4]) * np.array([2,2,2,2])

array([2, 4, 6, 8])

In [113]:
o1 = np.ones((1,4))
o2 = np.ones((3,4))

o1+o2

array([[2., 2., 2., 2.],
       [2., 2., 2., 2.],
       [2., 2., 2., 2.]])

In [114]:
o1 = np.ones((4,1))
o2 = np.ones((4,3))

o1+o2

array([[2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])

In [116]:
mean = np.arange(10).mean()
mean

np.float64(4.5)

In [117]:
summing = np.ones((4,4)).sum()
summing

np.float64(16.0)

Comparisons

In [131]:
arr = np.arange(0, 10)
print(arr)
arr < 3


[0 1 2 3 4 5 6 7 8 9]


array([ True,  True,  True, False, False, False, False, False, False,
       False])

In [142]:
np.random.seed(0)

arr = np.random.randint(0, 41, size=(3,4))
print(arr)
arr <= np.array([ 0,  3,  3, 39])

[[ 0  3  3 39]
 [ 9 19 21 36]
 [23  6 24 24]]


array([[ True,  True,  True,  True],
       [False, False, False,  True],
       [False, False, False,  True]])

# Pandas

In [143]:
import pandas as pd

In [144]:
s = pd.Series([1, 2, 3, 4, 5])
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [145]:
type(s.values)

numpy.ndarray

In [146]:
s.values

array([1, 2, 3, 4, 5])

In [148]:
s[0]

np.int64(1)

In [149]:
s[1:3]

1    2
2    3
dtype: int64

In [150]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [151]:
ss = pd.Series([1, 2, 3, 4, 5], index = ['a', 'b', 'c', 'd', 'e'])

In [152]:
ss['d']

np.int64(4)

In [153]:
ss['b':'d']

b    2
c    3
d    4
dtype: int64

In [154]:
ss[1:4]

b    2
c    3
d    4
dtype: int64

In [156]:
s = pd.Series({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5})
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [160]:
paid = {"Louvre Museum": 5988065, "Orsay Museum": 1850092,
"Pompidou Centre": 2620481, "National Natural History Museum": 404497}

free = {"Louvre Museum": 4117897, "Orsay Museum": 1436132,
"Pompidou Centre": 1070337, "National Natural History Museum": 344572}

museum = pd.DataFrame({"paid": paid, "free": free})

In [161]:
museum

Unnamed: 0,paid,free
Louvre Museum,5988065,4117897
Orsay Museum,1850092,1436132
Pompidou Centre,2620481,1070337
National Natural History Museum,404497,344572


In [162]:
museum.index

Index(['Louvre Museum', 'Orsay Museum', 'Pompidou Centre',
       'National Natural History Museum'],
      dtype='object')

In [164]:
museum.columns

Index(['paid', 'free'], dtype='object')

In [165]:
museum["free"]

Louvre Museum                      4117897
Orsay Museum                       1436132
Pompidou Centre                    1070337
National Natural History Museum     344572
Name: free, dtype: int64

In [166]:
museum["Orsay Museum": "National Natural History Museum" ]

Unnamed: 0,paid,free
Orsay Museum,1850092,1436132
Pompidou Centre,2620481,1070337
National Natural History Museum,404497,344572


In [167]:
museum["Orsay Museum": "National Natural History Museum"]["paid"]

Orsay Museum                       1850092
Pompidou Centre                    2620481
National Natural History Museum     404497
Name: paid, dtype: int64

In [175]:
museum[museum["paid"] > 10000]

Unnamed: 0,paid,free
Louvre Museum,5988065,4117897
Orsay Museum,1850092,1436132
Pompidou Centre,2620481,1070337
National Natural History Museum,404497,344572


In [177]:
museum['total'] = museum["paid"] + museum["free"]
museum

Unnamed: 0,paid,free,total
Louvre Museum,5988065,4117897,10105962
Orsay Museum,1850092,1436132,3286224
Pompidou Centre,2620481,1070337,3690818
National Natural History Museum,404497,344572,749069


In [179]:
museum["total"].sum()

np.int64(17832073)

In [180]:
museum['total'].mean()

np.float64(4458018.25)

In [181]:
df = pd.read_csv("data.csv", index_col=0)
df

Unnamed: 0_level_0,paid,free
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Louvre Museum,5988065,4117897
Orsay Museum,1850092,1436132
Pompidou Centre,2620481,1070337
National Natural History Museum,404497,344572


In [182]:
df["total"] = df["paid"] + df["free"]
df

Unnamed: 0_level_0,paid,free,total
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Louvre Museum,5988065,4117897,10105962
Orsay Museum,1850092,1436132,3286224
Pompidou Centre,2620481,1070337,3690818
National Natural History Museum,404497,344572,749069


In [183]:
df.to_csv("data_with_total.csv")