# A Complete Guide to Numpy

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os           # for dirname, _, filenames in os.walk('/kaggle/input'):

In [4]:
train=pd.read_csv(r"D:/train.csv")
df=train.copy()
df.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
df.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


# 1. Creating a Numpy array


In [6]:
# Empty array. Note the values are randomly initialized.
emp=np.empty((4,2))
emp


array([[6.23042070e-307, 4.67296746e-307],
       [1.69121096e-306, 6.23058707e-307],
       [2.22526399e-307, 6.23053614e-307],
       [7.56592338e-307, 1.21378159e-311]])

In [7]:
# Creating numpy array from array.
arr=np.array([1,2,3])
print(arr.shape)
arr

(3,)


array([1, 2, 3])

In [8]:
#Creating 2-D array
arr2=np.array([[1,4,3],[2,4,6]])
print(arr2.shape)
arr2

(2, 3)


array([[1, 4, 3],
       [2, 4, 6]])

To check number of dimesnsions in numpy array, use '.ndim' attribute:

In [9]:
arr2.ndim

2

# 2. The Basics


some basic attributes and operations of a numpy array.

In [10]:
arr=np.array([[2,3,4],[7,3,1],[2,5,6],[5,1,2]])
arr

array([[2, 3, 4],
       [7, 3, 1],
       [2, 5, 6],
       [5, 1, 2]])

In [11]:
# Shape of a numpy array
arr.shape

(4, 3)

In [12]:
# Resize a numpy array
arr2=arr.reshape(3,4)
arr2

array([[2, 3, 4, 7],
       [3, 1, 2, 5],
       [6, 5, 1, 2]])

In [13]:
# Reshaping with only one dimension known.
arr3=arr.reshape(-1,2)
arr3.shape

(6, 2)

In [14]:
# Dimensions of an array
print(arr)
arr.ndim

[[2 3 4]
 [7 3 1]
 [2 5 6]
 [5 1 2]]


2

In [15]:
# Size of each element in bytes
print(arr.dtype)
arr.itemsize #(64/8=8bytes)

int32


4

In [16]:
# Lets try with int 16
arr=arr.astype('int16') # now this will be 2 bytes. (16/8=2bytes)
print(arr.dtype)
arr.itemsize

int16


2

# 3. Some Standard Matrices

Matrix of all zeros

In [17]:
zeros=np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

Matrix of all ones

In [18]:
#ones
ones=np.ones((3,2))
ones

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

Matrix initialised with some constant value.

In [19]:
# From some constant value
full=np.full((2,3),9)
full

array([[9, 9, 9],
       [9, 9, 9]])

Identity Matrix


In [20]:
order=3
I=np.eye(order)
I

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

# 4. Indexing and Slicing

In [21]:
arr=np.array([[1,2,3],[5,7,6],[2,4,9]])
arr

array([[1, 2, 3],
       [5, 7, 6],
       [2, 4, 9]])

In [22]:
# First element
arr[0]

array([1, 2, 3])

In [23]:
# third element in second row
arr[1,2]

6

Slicing

In [24]:
# All but 1st element(0 based indexing)
arr[1:]

array([[5, 7, 6],
       [2, 4, 9]])

In [25]:
# All but first element of every row
arr[:,1:]

array([[2, 3],
       [7, 6],
       [4, 9]])

In [26]:
# Negative Indices. this one gives last element.
arr[-1]

array([2, 4, 9])

In [27]:
# Interesting
arr[:,-2:]

array([[2, 3],
       [7, 6],
       [4, 9]])

# 5. Iterating over a Numpy array

In [28]:
arr

array([[1, 2, 3],
       [5, 7, 6],
       [2, 4, 9]])

In [29]:
for x in np.nditer(arr):
    print(x)

1
2
3
5
7
6
2
4
9


In [30]:
for x in np.nditer(arr.T):
    print(x)


1
2
3
5
7
6
2
4
9


In [31]:
# Iterating in normal way
for row in arr:
    for x in row:
        print(x)

1
2
3
5
7
6
2
4
9


# 6. Linspace and arange

Linspace
Linspace(low,high,count) generates 'evenly spaced' count no of elements b/w low and high. (BOTH INCLUSIVE).

In [32]:
arr_lin=np.linspace(1,250,5)
print(arr_lin.shape)
arr_lin

(5,)


array([  1.  ,  63.25, 125.5 , 187.75, 250.  ])

In [33]:
arr_lin=np.linspace(10,90,8)
print(arr_lin.shape)
arr_lin

(8,)


array([10.        , 21.42857143, 32.85714286, 44.28571429, 55.71428571,
       67.14285714, 78.57142857, 90.        ])

arange
Generates elements from low to high in given step size. Starts with low and increases step size till high is reached (EXCLUSIVE)

In [34]:
ar1=np.arange(1,100,1) # here third arg is the step size
ar1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [37]:
ar2=np.arange(10,1,-2) #  in reverse order
ar2

array([10,  8,  6,  4,  2])

# 7. Generating Random Numbers/Arrays


Random integers


In [38]:
x=np.random.randint(11)
x

6

In [40]:
x=np.random.randint(low=11,high=21)
x

20

In [41]:
# Array of given size
rand_arr=np.random.randint(11,121,(2,3))
rand_arr

array([[100,  17,  28],
       [103,  88,  15]])

Random number between 0 and 1

In [42]:
# Random float b/w 0 and 1 [0,1)
rand_float=np.random.random((3,3))
rand_float

array([[0.30496491, 0.99562218, 0.09603499],
       [0.62455418, 0.35751645, 0.60212805],
       [0.28248245, 0.77176376, 0.54064049]])

Random floats in a range


In [44]:
low=10
high=20
rand_float=np.random.random((3,2))
rand_float=low+rand_float*(high-low)
rand_float

array([[18.37396227, 19.75290093],
       [12.49927342, 10.1181736 ],
       [18.7129825 , 10.80318441]])

In [45]:
# or we can use the random.uniform function
rand_arr=np.random.uniform(low,high,(2,3))
print(rand_arr)

[[16.69586888 16.94218791 19.65933195]
 [12.84627666 18.96015118 11.42342973]]


In [46]:
# Random values from standard normal dist
rnd=np.random.randn(3,2)

mu=np.mean(rnd)
var=np.var(rnd)
print(mu,var)
rnd

0.30480881963174666 0.42514448016771555


array([[-0.61421114,  1.14874031],
       [ 0.39918453, -0.47096077],
       [ 0.90349366,  0.46260633]])

In [47]:
# A Specified mean(loc) and std dev(scale).
rnd=np.random.normal(2,.9,(1000,3)) # last is shape first is mean and second is variance

mu=np.mean(rnd)
var=np.var(rnd)
print(mu,var)
rnd

1.9988165618683986 0.8134835549971523


array([[0.69998984, 1.00287607, 0.57143577],
       [2.93964222, 2.5283687 , 1.57886618],
       [2.76574536, 2.18757121, 1.15367338],
       ...,
       [1.89046849, 2.35988345, 1.54594821],
       [2.6195926 , 1.67741138, 1.47911634],
       [2.01110896, 2.77778063, 1.26939796]])

# 8. Common Matrix Operations

In [48]:
mat=np.array([[2,3,1],[4,5,6],[9,7,3]])
mat

array([[2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])

Transpose of a matrix

In [50]:
#Transpose
mat.T

array([[2, 4, 9],
       [3, 5, 7],
       [1, 6, 3]])

Multiplicative inverse (A^(-1))

In [52]:
mat_inv=np.linalg.inv(mat)
print(mat_inv)

[[-0.49090909 -0.03636364  0.23636364]
 [ 0.76363636 -0.05454545 -0.14545455]
 [-0.30909091  0.23636364 -0.03636364]]


Matrix multiplication

In [54]:
# To check if inverse is right, just multiply it again with mat. Should return identity matrix
np.matmul(mat,mat_inv)

array([[1.00000000e+00, 0.00000000e+00, 2.08166817e-17],
       [2.22044605e-16, 1.00000000e+00, 1.11022302e-16],
       [4.44089210e-16, 0.00000000e+00, 1.00000000e+00]])

# 9. Arithmetic operations between matrices

In [55]:
mat

array([[2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])

In [56]:
mat2=mat.T
mat2

array([[2, 4, 9],
       [3, 5, 7],
       [1, 6, 3]])

In [57]:
# Addition
mat+mat2

array([[ 4,  7, 10],
       [ 7, 10, 13],
       [10, 13,  6]])

In [58]:
# Subtraction
mat-mat2

array([[ 0, -1, -8],
       [ 1,  0, -1],
       [ 8,  1,  0]])

that this is NOT MATRIX MULTIPLICATION. FOR That USE np.matmul()

In [59]:
mat*mat2 # Incorrect

array([[ 4, 12,  9],
       [12, 25, 42],
       [ 9, 42,  9]])

This is correct matrix multiplication:

In [60]:
np.matmul(mat,mat2)

array([[ 14,  29,  42],
       [ 29,  77,  89],
       [ 42,  89, 139]])

In [61]:
# Division
div=np.matmul(mat,np.linalg.inv(mat2))
div

array([[-0.85454545,  1.21818182,  0.05454545],
       [-0.72727273,  1.90909091, -0.27272727],
       [-3.96363636,  6.05454545, -1.23636364]])

# 10. Arithmetic operations of matrix with a scalar (element-wise)

In [62]:
arr=np.array([10,12,14,15,17,21])
arr

array([10, 12, 14, 15, 17, 21])

In [63]:
# Addition
arr2=10+arr 
# works the other way round also 
arr2

array([20, 22, 24, 25, 27, 31])

In [67]:
#sub
arr_sub=arr-10 # works the other way round also
arr_sub

array([ 0,  2,  4,  5,  7, 11])

In [65]:
#mul
arr_mul=arr*2
arr_mul

array([20, 24, 28, 30, 34, 42])

In [66]:
#division
arr_div=arr/2
arr_div

array([ 5. ,  6. ,  7. ,  7.5,  8.5, 10.5])

Miscellaneous

In [68]:
arr*arr # element wise square


array([100, 144, 196, 225, 289, 441])

In [69]:
arr+arr # Note this not appending, this is equilavent to 2*arr.


array([20, 24, 28, 30, 34, 42])

# 11. Mathematical Operation(s)

 some common mathematical operations in numpy

In [71]:
# Log
x=2.73
ans=np.log(x)
ans

1.0043016091968684

In [72]:
# On a  list (element wise)
l=[2,3,4,5,6,1,2.73]
np.log(l)

array([0.69314718, 1.09861229, 1.38629436, 1.60943791, 1.79175947,
       0.        , 1.00430161])

In [73]:
# Expo
x=np.log(2)
ans=np.exp(x)
print(ans)

print('\n\n')

#On a list
l=[np.log(x) for x in range(1,10)]
ans=[np.exp(i) for i in l]
ans

2.0





[1.0,
 2.0,
 3.0000000000000004,
 4.0,
 4.999999999999999,
 6.0,
 6.999999999999999,
 7.999999999999998,
 9.000000000000002]

Sin , Cos and Tan

In [75]:
factor=[1,2,3,4]
rads=[np.pi/i for i in factor]
sins=[np.sin(rads)]
cos=[np.cos(rads)]
tans=[np.tan(rads)]

print("sins: ",sins)
print("cos: ",cos)
print("tans: ",tans)

sins:  [array([1.22464680e-16, 1.00000000e+00, 8.66025404e-01, 7.07106781e-01])]
cos:  [array([-1.00000000e+00,  6.12323400e-17,  5.00000000e-01,  7.07106781e-01])]
tans:  [array([-1.22464680e-16,  1.63312394e+16,  1.73205081e+00,  1.00000000e+00])]


# 12. Statistical Operation(s)

see some common statistical operations in numpy

In [76]:
mat

array([[2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])

In [77]:
# minimum
print(mat.min())
print(mat.min(axis=0))
print(mat.min(axis=1))

1
[2 3 1]
[1 4 3]


In [78]:
# maximum
print(mat.max())
print(mat.max(axis=0))
print(mat.max(axis=1))

9
[9 7 6]
[3 6 9]


In [79]:
# Mean
print(mat.mean()) # of whole array.
print(mat.mean(axis=0))
print(mat.mean(axis=1))

4.444444444444445
[5.         5.         3.33333333]
[2.         5.         6.33333333]


In [80]:
# Std Dev
print(mat.std()) # of whole array.
print(mat.std(axis=0))
print(mat.std(axis=1))

2.4088314876309775
[2.94392029 1.63299316 2.05480467]
[0.81649658 0.81649658 2.49443826]


In [81]:
# Variance
print(mat.var()) # of whole array.
print(mat.var(axis=0))
print(mat.var(axis=1))

5.802469135802469
[8.66666667 2.66666667 4.22222222]
[0.66666667 0.66666667 6.22222222]


# 13. Concatenating numpy arrays

In [82]:
mat

array([[2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])

In [83]:
mat2

array([[2, 4, 9],
       [3, 5, 7],
       [1, 6, 3]])

In [84]:
result=np.concatenate((mat2,mat),axis=1)
result

array([[2, 4, 9, 2, 3, 1],
       [3, 5, 7, 4, 5, 6],
       [1, 6, 3, 9, 7, 3]])

In [85]:
result=np.concatenate((mat2,mat),axis=0)
result

array([[2, 4, 9],
       [3, 5, 7],
       [1, 6, 3],
       [2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])

Note that this can be achieved using 'hstack' and 'vstack' functions as well.

In [86]:
np.hstack([mat2,mat])

array([[2, 4, 9, 2, 3, 1],
       [3, 5, 7, 4, 5, 6],
       [1, 6, 3, 9, 7, 3]])

In [87]:
np.vstack([mat2,mat])

array([[2, 4, 9],
       [3, 5, 7],
       [1, 6, 3],
       [2, 3, 1],
       [4, 5, 6],
       [9, 7, 3]])