# Python Numpy

### Zip function

In [1]:
x = ['james','tony','bob']

In [2]:
y = [34,41,36]

In [3]:
for i,j in zip(x,y):
    print(f"{i} is {j} years old")

james is 34 years old
tony is 41 years old
bob is 36 years old


### Numpy arrays

In [9]:
import numpy as np

In [5]:
arr = np.array([5,2,6,8])

In [6]:
arr

array([5, 2, 6, 8])

In [7]:
type(arr)

numpy.ndarray

In [8]:
arr[2]

6

In [9]:
len(arr)

4

### Numpy operations

In [12]:
x = [4,7,2]
y = [2,5,9]

In [13]:
# Can't perform element-wise operation with python lists
x * y

TypeError: can't multiply sequence by non-int of type 'list'

In [14]:
p = np.array([4,7,2])
q = np.array([2,5,9])

In [15]:
p * q

array([ 8, 35, 18])

In [17]:
# dot product
np.dot(p,q)

61

In [18]:
p.sum()

13

In [19]:
x.sum()

AttributeError: 'list' object has no attribute 'sum'

In [22]:
%%time
np.dot(p,q)
# Measuring time of operations in jupyter notebook

CPU times: total: 0 ns
Wall time: 0 ns


61

### Multidimensional numpy arrays

In [23]:
x = np.array([4,6,8,1,3])

In [24]:
y = np.array([[6,3,2],
              [7,3,5],
              [9,6,1],
              [8,8,7]])

In [25]:
x.shape

(5,)

In [27]:
y.shape

(4, 3)

In [28]:
y.dtype

dtype('int32')

In [30]:
z = np.array([[6,3,2],
              [7,3,5],
              [9,6,1]])

In [32]:
#matrix multiplication
np.matmul(y,z)

array([[ 75,  39,  29],
       [108,  60,  34],
       [105,  51,  49],
       [167,  90,  63]])

In [33]:
#matrix multiplication shortcut
y @ z

array([[ 75,  39,  29],
       [108,  60,  34],
       [105,  51,  49],
       [167,  90,  63]])

### Working with numpy on CSV files

In [7]:
import urllib.request

urllib.request.urlretrieve('https://query.data.world/s/bpccygejx3enwbbjdd7gho335uwpmy','iplDeliveries.txt')

('iplDeliveries.txt', <http.client.HTTPMessage at 0x25099df8610>)

In [10]:
ipl_deliveries_data = np.genfromtxt('iplDeliveries.txt',delimiter=',',skip_header=1)

In [11]:
ipl_deliveries_data

array([[  1.,   1.,  nan, ...,  nan,  nan,  nan],
       [  1.,   1.,  nan, ...,  nan,  nan,  nan],
       [  1.,   1.,  nan, ...,  nan,  nan,  nan],
       ...,
       [577.,   2.,  nan, ...,  nan,  nan,  nan],
       [577.,   2.,  nan, ...,  nan,  nan,  nan],
       [577.,   2.,  nan, ...,  nan,  nan,  nan]])

In [13]:
ipl_deliveries_data.shape

136598

In [19]:
x = np.array([4]*ipl_deliveries_data.shape[0])
x

array([4, 4, 4, ..., 4, 4, 4])

In [20]:
np.concatenate(ipl_deliveries_data,x.reshape(136598,1),axis=1) #error

TypeError: concatenate() got multiple values for argument 'axis'

In [21]:
np.savetxt('iplDeliveries.txt',ipl_deliveries_data,fmt='%.2f',header='match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder',comments='')