In [None]:
import numpy as np
np.random.seed(0) # seed for reproducibility
x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional array
x1[0] = 3.14159 # this will be truncated!
x1


array([3, 0, 3, 3, 7, 9])

In [None]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
x[:5] # first five elements

array([0, 1, 2, 3, 4])

In [None]:
x[5:] # elements after index 5

array([5, 6, 7, 8, 9])

In [None]:
x[4:7] # middle sub-array

array([4, 5, 6])

In [None]:
x[::2] # every other element

array([0, 2, 4, 6, 8])

In [None]:
x[1::2] # every other element, starting at index 1

array([1, 3, 5, 7, 9])

In [None]:
x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [None]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

In [None]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [None]:
x2[:2, :3] # two rows, three columns

array([[3, 5, 2],
       [7, 6, 8]])

In [None]:
x2[:3, ::2] # all rows, every other column

array([[3, 2],
       [7, 8],
       [1, 7]])

In [None]:
x2[::-1, ::-1]

array([[7, 7, 6, 1],
       [8, 8, 6, 7],
       [4, 2, 5, 3]])

In [None]:
print(x2[:, 0]) # first column of x2

[3 7 1]


In [None]:
print(x2[0, :]) # first row of x2

[3 5 2 4]


In [None]:
print(x2[0]) # equivalent to x2[0, :]

[3 5 2 4]


In [None]:
print(x2)

[[3 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]


In [None]:
x2_sub = x2[:2, :2]
print(x2_sub)


[[3 5]
 [7 6]]


In [None]:
x2_sub[0, 0] = 99
print(x2_sub)


[[99  5]
 [ 7  6]]


In [None]:
print(x2)


[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [None]:
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)


[[99  5]
 [ 7  6]]


In [None]:
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  5]
 [ 7  6]]


In [None]:
print(x2)


[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [None]:
grid = np.arange(1, 10).reshape((3, 3))
print(grid)


[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [None]:
x = np.array([1, 2, 3])
# row vector via reshape
x.reshape((1, 3))

array([[1, 2, 3]])

In [None]:
 # row vector via newaxis
x[np.newaxis, :]


array([[1, 2, 3]])

In [None]:
 # column vector via reshape
x.reshape((3, 1))


array([[1],
       [2],
       [3]])

In [None]:
 # column vector via newaxis
x[:, np.newaxis]


array([[1],
       [2],
       [3]])

In [None]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [None]:
z = [99, 99, 99]
print(np.concatenate([x, y, z]))


[ 1  2  3  3  2  1 99 99 99]


In [None]:
grid = np.array([[1, 2, 3],
[4, 5, 6]])


In [None]:
 # concatenate along the first axis
np.concatenate([grid, grid])


array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [None]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)


array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [None]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
 [6, 5, 4]])
# vertically stack the arrays
np.vstack([x, grid])


array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [None]:
 # horizontally stack the arrays
y = np.array([[99],
 [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [None]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [None]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [None]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [None]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


In [None]:
import numpy as np
x = np.array([2, 3, 5, 7, 11, 13])
x * 2


array([ 4,  6, 10, 14, 22, 26])

In [None]:
data = ['peter', 'Paul', 'MARY', 'gUIDO']
[s.capitalize() for s in data]


['Peter', 'Paul', 'Mary', 'Guido']

In [None]:
data = ['peter', 'Paul', 'MARY', 'gUIDO']
[s.capitalize() for s in data]


['Peter', 'Paul', 'Mary', 'Guido']

In [None]:
import pandas as pd
names = pd.Series(data)
names

0    peter
1     Paul
2     None
3     MARY
4    gUIDO
dtype: object

In [None]:
names.str.capitalize()


0    Peter
1     Paul
2     None
3     Mary
4    Guido
dtype: object

In [None]:
monte = pd.Series(['Graham Chapman', 'John Cleese', 'Terry Gilliam',
 'Eric Idle', 'Terry Jones', 'Michael Palin'])
monte

0    Graham Chapman
1       John Cleese
2     Terry Gilliam
3         Eric Idle
4       Terry Jones
5     Michael Palin
dtype: object

In [None]:
monte.str.lower()


0    graham chapman
1       john cleese
2     terry gilliam
3         eric idle
4       terry jones
5     michael palin
dtype: object

In [None]:
monte.str.len()

0    14
1    11
2    13
3     9
4    11
5    13
dtype: int64

In [None]:
monte.str.startswith('T')

0    False
1    False
2     True
3    False
4     True
5    False
dtype: bool

In [None]:
monte.str.split()


0    [Graham, Chapman]
1       [John, Cleese]
2     [Terry, Gilliam]
3         [Eric, Idle]
4       [Terry, Jones]
5     [Michael, Palin]
dtype: object

In [None]:
monte.str.extract('([A-Za-z]+)', expand=False)


0     Graham
1       John
2      Terry
3       Eric
4      Terry
5    Michael
dtype: object

In [None]:
monte.str.findall(r'^[^AEIOU].*[^aeiou]$')


0    [Graham Chapman]
1                  []
2     [Terry Gilliam]
3                  []
4       [Terry Jones]
5     [Michael Palin]
dtype: object

In [None]:
monte.str[0:3]

0    Gra
1    Joh
2    Ter
3    Eri
4    Ter
5    Mic
dtype: object

In [None]:
monte.str.split().str.get(-1)


0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

In [None]:
full_monte = pd.DataFrame({'name': monte,
 'info': ['B|C|D', 'B|D', 'A|C',
 'B|D', 'B|C', 'B|C|D']})
full_monte


Unnamed: 0,name,info
0,Graham Chapman,B|C|D
1,John Cleese,B|D
2,Terry Gilliam,A|C
3,Eric Idle,B|D
4,Terry Jones,B|C
5,Michael Palin,B|C|D


In [None]:
full_monte['info'].str.get_dummies('|')

Unnamed: 0,A,B,C,D
0,0,1,1,1
1,0,1,0,1
2,1,0,1,0
3,0,1,0,1
4,0,1,1,0
5,0,1,1,1


In [None]:
# !curl -O http://openrecipes.s3.amazonaws.com/recipeitems-latest.json.gz
# !gunzip recipeitems-latest.json.gz

In [None]:
repo = "https://raw.githubusercontent.com/jakevdp/open-recipe-data/master"
!cd data && curl -O {repo}/recipeitems.json.gz
!gunzip data/recipeitems.json.gz

/bin/bash: line 1: cd: data: No such file or directory
gzip: data/recipeitems.json.gz: No such file or directory


In [None]:
try:
 recipes = pd.read_json('/content/openrecipes-master.zip')
except ValueError as e:
 print("ValueError:", e)


ValueError: Multiple files found in ZIP file. Only one file per ZIP: ['openrecipes-master/', 'openrecipes-master/.gitignore', 'openrecipes-master/CONTRIBUTORS', 'openrecipes-master/LICENSE', 'openrecipes-master/README.md', 'openrecipes-master/requirements.txt', 'openrecipes-master/scrapy_proj/', 'openrecipes-master/scrapy_proj/generate.py', 'openrecipes-master/scrapy_proj/grab_html.py', 'openrecipes-master/scrapy_proj/openrecipes/', 'openrecipes-master/scrapy_proj/openrecipes/__init__.py', 'openrecipes-master/scrapy_proj/openrecipes/hrecipe_parser.py', 'openrecipes-master/scrapy_proj/openrecipes/items.py', 'openrecipes-master/scrapy_proj/openrecipes/pipelines.py', 'openrecipes-master/scrapy_proj/openrecipes/schema_org_parser.py', 'openrecipes-master/scrapy_proj/openrecipes/settings.py.default', 'openrecipes-master/scrapy_proj/openrecipes/spiders/', 'openrecipes-master/scrapy_proj/openrecipes/spiders/__init__.py', 'openrecipes-master/scrapy_proj/openrecipes/spiders/allrecipes_spider.py'