# NumPy Data Types

In [2]:
import numpy as np

Data Types in Python

By default Python have these data types:

1. strings - used to represent text data, the text is given under quote marks. eg. "ABCD"

2. integer - used to represent integer numbers. eg. -1, -2, -3

3. float - used to represent real numbers. eg. 1.2, 42.42

4. boolean - used to represent True or False.

5. complex - used to represent a number in complex plain. eg. 1.0 + 2.0j, 1.5 + 2.5j



Data Types in NumPy

NumPy has some extra data types, and refer to data types with one character, like i for integers, u for unsigned integers etc.

Below is a list of all data types in NumPy and the characters used to represent them.

i - integer

b - boolean

u - unsigned integer[https://www.ibm.com/docs/en/i/7.4?topic=type-unsigned-format]

f - float

c - complex float

m - timedelta[Timedelta in Python is an object that represents the duration. It is mainly used to calculate the duration between two dates and times] 
https://www.guru99.com/date-time-and-datetime-classes-in-python.html

M - datetime

O - object

S - string

U - unicode string[https://docs.python.org/3/howto/unicode.html]


Checking the Data Type of an Array

The NumPy array object has a property called dtype that returns the data type of the array:

In [5]:
#et the data type of an array object:

arr = np.array([1.2, 'A',2, 3, 4])

print(arr.dtype)

<U32


In [6]:
arr = np.array(['@apple', 'banana', 'cherry'])

print(arr.dtype)

<U6


# Creating Arrays With a Defined Data Type

We use the array() function to create arrays, this function can take an optional argument: dtype that allows us to define the expected data type of the array elements:

In [5]:
arr = np.array([1, 2, 3, 4], dtype='S')

print(arr)
print(arr.dtype)

[b'1' b'2' b'3' b'4']
|S1


For i, u, f, S and U we can define size as well.

In [6]:
#Create an array with data type 4 bytes integer:
#[when you define an integer type variable in any programming language,
#a memory of 4 bytes will be allocated to store its value(]]

import numpy as np

arr = np.array([1, 2, 3, 4], dtype='i4')

print(arr)
print(arr.dtype)

[1 2 3 4]
int32


In [9]:
A = np.ones(shape=(3, 4, 2), dtype=float)
print(A)
print(A.dtype)

[[[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]]
float64


In [7]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [8]:
# Create a 3x5 floating-point array filled with ones
np.ones(shape=(3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

# NumPy Sorting Arrays

Sorting Arrays
Sorting means putting elements in a ordered sequence.

Ordered sequence is any sequence that has an order corresponding to elements, like numeric or alphabetical, ascending or descending.

The NumPy ndarray object has a function called sort(), that will sort a specified array.

In [14]:
arr = np.array([3, 2, 0, 1])
print(arr)


[3 2 0 1]


In [15]:
print(np.sort(arr))

#Note: This method returns a copy of the array, leaving the original array unchanged.

[0 1 2 3]


In [21]:
#reverese sorting:
arr = np.array([3, 2, 0, 1])
print(arr)
sortarr=np.sort(arr)
print(sortarr)
sortarr[::-1]

[3 2 0 1]
[0 1 2 3]


array([3, 2, 1, 0])

In [16]:
arr = np.array(['banana', 'cherry', 'apple'])

print(np.sort(arr))

['apple' 'banana' 'cherry']


In [17]:
arr = np.array([True, False, True])

print(np.sort(arr))

[False  True  True]


In [23]:
#Sort a 2-D array:


arr = np.array([[3, 2, 4], [5, 0, 1]])

arr

array([[3, 2, 4],
       [5, 0, 1]])

In [25]:
newarr=np.sort(arr)
newarr

array([[2, 3, 4],
       [0, 1, 5]])

In [26]:
revarr=newarr[::-1]
print(revarr)

[[0 1 5]
 [2 3 4]]


# NumPy - Mathematical Functions

# Trigonometric Functions

In [27]:
a = np.array([0,30,45,60,90]) 

print('Sine of different angles:') 
# Convert to radians by multiplying with pi/180 
print(np.sin(a*np.pi/180) )

Sine of different angles:
[0.         0.5        0.70710678 0.8660254  1.        ]


In [28]:
print('Cosine values for angles in array:' )
print(np.cos(a*np.pi/180))

Cosine values for angles in array:
[1.00000000e+00 8.66025404e-01 7.07106781e-01 5.00000000e-01
 6.12323400e-17]


In [29]:
print('Tangent values for given angles:' )
print(np.tan(a*np.pi/180) )

Tangent values for given angles:
[0.00000000e+00 5.77350269e-01 1.00000000e+00 1.73205081e+00
 1.63312394e+16]


#arcsin, arcos, and arctan functions return the trigonometric inverse of sin, cos, and tan of the given angle. The result of these functions can be verified by numpy.degrees() function by converting radians to degrees.

Sin verification

In [30]:
a = np.array([0,30,45,60,90]) 

print('Array containing sine values:' )
sin = np.sin(a*np.pi/180) 
print(sin)


Array containing sine values:
[0.         0.5        0.70710678 0.8660254  1.        ]


In [31]:
print('Compute sine inverse of angles. Returned values are in radians.' )
inv = np.arcsin(sin) 
print(inv)



Compute sine inverse of angles. Returned values are in radians.
[0.         0.52359878 0.78539816 1.04719755 1.57079633]


In [32]:
print('Check result by converting to degrees:' )
print(np.degrees(inv))

Check result by converting to degrees:
[ 0. 30. 45. 60. 90.]


# Lab exercise


In [None]:
#Verify for cos and tan

# Functions for Rounding

numpy.around(a,decimals)

1	a: Input data

2	decimals:The number of decimals to round to. Default is 0. 

In [2]:
import numpy as np
a = np.array([1.0125,5.5462, 1.2342, 0.5679, 25.1532]) 
print(a)


[ 1.0125  5.5462  1.2342  0.5679 25.1532]


In [3]:
print('After rounding:' )
print(np.round(a)) 


After rounding:
[ 1.  6.  1.  1. 25.]


In [4]:
print(np.round(a, decimals = 1) )


[ 1.   5.5  1.2  0.6 25.2]


In [5]:
print(np.round(a, decimals = 2))

[ 1.01  5.55  1.23  0.57 25.15]


# Arithmetic Operations

Input arrays for performing arithmetic operations such as add(), subtract(), multiply(), and divide() must be either of the same shape or should conform to array broadcasting rules.

In [12]:
a = np.arange(3, dtype = np.float_)
print(a)

[0. 1. 2.]


In [13]:
b = np.array([10,10,10]) 
print(b)

[10 10 10]


In [14]:
print('Add the two arrays:' )
print(np.add(a,b)) 

Add the two arrays:
[10. 11. 12.]


In [16]:
print('Subtract the two arrays:' )
print(np.subtract(a,b)) 

Subtract the two arrays:
[-10.  -9.  -8.]


In [17]:
print('Multiply the two arrays:' )
print(np.multiply(a,b))

Multiply the two arrays:
[ 0. 10. 20.]


In [18]:
print('Divide the two arrays:' )
print(np.divide(a,b))

Divide the two arrays:
[0.  0.1 0.2]


numpy.reciprocal()
This function returns the reciprocal of argument, element-wise. For elements with absolute values larger than 1, the result is always 0 because of the way in which Python handles integer division. For integer 0, an overflow warning is issued.

In [19]:
a = np.array([0.25, 1.33, 1, 0, 100]) 
print(a)

[  0.25   1.33   1.     0.   100.  ]


In [20]:
print('After applying reciprocal function:' )
print(np.reciprocal(a))

After applying reciprocal function:
[4.        0.7518797 1.              inf 0.01     ]


  print(np.reciprocal(a))


numpy.power()


This function treats elements in the first input array as base and returns it raised to the power of the corresponding element in the second input array.

In [22]:
a = np.array([10,100,1000]) 

print(a) 

[  10  100 1000]


In [23]:
print('Applying power function:' )
print(np.power(a,2))

Applying power function:
[    100   10000 1000000]


In [24]:
b = np.array([1,2,3]) 
b

array([1, 2, 3])

In [25]:
print('Applying power function again:' )
print(np.power(a,b))

Applying power function again:
[        10      10000 1000000000]


numpy.mod()

This function returns the remainder of division of the corresponding elements in the input array. The function numpy.remainder() also produces the same result.



In [26]:
a = np.array([10,20,30]) 
a

array([10, 20, 30])

In [27]:
b = np.array([3,5,7]) 
b

array([3, 5, 7])

In [28]:
print('Applying mod() function:' )
print(np.mod(a,b) )

Applying mod() function:
[1 0 2]


In [29]:
print('Applying remainder() function:' )
print(np.remainder(a,b)) 

Applying remainder() function:
[1 0 2]


The following functions are used to perform operations on array with complex numbers.

numpy.real() − returns the real part of the complex data type argument.

numpy.imag() − returns the imaginary part of the complex data type argument.

numpy.conj() − returns the complex conjugate, which is obtained by changing the sign of the imaginary part.

numpy.angle() − returns the angle of the complex argument. The function has degree parameter. If true, the angle in the degree is returned, otherwise the angle is in radians.


In [30]:
a = np.array([-5.6j, 0.2j, 11. , 1+1j]) 
print(a)

[-0.-5.6j  0.+0.2j 11.+0.j   1.+1.j ]


In [31]:
print('Applying real() function:' )
print(np.real(a))

Applying real() function:
[-0.  0. 11.  1.]


In [32]:
print('Applying imag() function:' )
print(np.imag(a))

Applying imag() function:
[-5.6  0.2  0.   1. ]


In [33]:
print('Applying conj() function:' )
print(np.conj(a))

Applying conj() function:
[-0.+5.6j  0.-0.2j 11.-0.j   1.-1.j ]


In [34]:
print('Applying angle() function:' )
print(np.angle(a))

Applying angle() function:
[-1.57079633  1.57079633  0.          0.78539816]


In [35]:
print('Applying angle() function again (result in degrees)' )
print(np.angle(a, deg = True))

Applying angle() function again (result in degrees)
[-90.  90.   0.  45.]


In [37]:
#numpy.square

np.square([11, 9])

array([121,  81], dtype=int32)

In [38]:
#numpy.absolute

x = np.array([-1.2, 1.2])
x

array([-1.2,  1.2])

In [39]:
np.absolute(x)

array([1.2, 1.2])

In [40]:
np.absolute(1.2 + 1j)

1.5620499351813308

In [41]:
#numpy.maximum

np.maximum([2, 3, 4], [1, 5, 2])

array([2, 5, 4])

In [42]:
#numpy.minimum

np.minimum([2, 3, 4], [1, 5, 2])

array([1, 3, 2])

In [43]:
#numpy.cbrt

np.cbrt([1,8,27])

array([1., 2., 3.])

In [44]:
#numpy.sqrt

np.sqrt([1,4,9])



array([1., 2., 3.])

In [48]:
x = np.array([1, 2])
x

array([1, 2])

In [49]:
# np.exp(x) -> The exponential function is e^x where e is a mathematical 
# constant called Euler's number, approximately 2.718281
np.exp(x)

# https://stackoverflow.com/questions/31951980/what-exactly-does-numpy-exp-do

array([2.71828183, 7.3890561 ])

There are the following two ways to create linear sequences:

> **np.arange**

> **np.linspace**

# numpy.linspace() in Python

linspace is similar to the colon operator, “:”, but gives direct control over the number of points and always includes the endpoints. 

“lin” in the name “linspace” refers to generating linearly spaced values as opposed to the sibling function logspace, which generates logarithmically spaced values.


numpy.linspace(start, stop, num = 50, endpoint = True, restep = False, dtype = None) : 

Returns number spaces evenly w.r.t interval. Similar to arange but instead of step it uses sample number.

Parameters :

-> start  : [optional] start of interval range. By default start = 0

-> stop   : end of interval range

-> restep : If True, return (samples, step). By deflut restep = False

-> num    : [int, optional] No. of samples to generate

-> dtype  : type of output array

In [51]:
np.linspace(2.0, 3.0, num=5)

array([2.  , 2.25, 2.5 , 2.75, 3.  ])

In [52]:
np.linspace(2.0, 3.0, num=5, endpoint=False)

array([2. , 2.2, 2.4, 2.6, 2.8])

In [53]:
np.linspace(2.0, 3.0, num=4, retstep=True)

(array([2.        , 2.33333333, 2.66666667, 3.        ]), 0.3333333333333333)

# LogSpace

LogSpace returns even spaced numbers on a log scale. Logspace has the same parameters as np.linspace.

Parameters :

-> start    : [float] start(base ** start) of interval range.

-> stop     : [float] end(base ** stop) of interval range

-> endpoint : [boolean, optional]If True, stop is the last sample. By default, True

-> num      : [int, optional] No. of samples to generate

-> base     : [float, optional] Base of log scale. By default, equals 10.0

-> dtype    : type of output array

In [59]:
np.logspace(1.0, 4.0, num=4)

array([   10.,   100.,  1000., 10000.])

In [60]:
np.logspace(1.0, 4.0, num=3, endpoint=False)

array([  10.,  100., 1000.])

In [55]:
np.logspace(3.0, 4.0, num=4, base=11)

array([ 1331.        ,  2960.11750055,  6583.24238696, 14641.        ])

# numpy statistical functions

# numpy.random.normal

[creates an array of specified shape and fills it with random values which is actually a part of Normal(Gaussian)Distribution]

numpy.random.normal(loc=0.0, scale=1.0, size=None)

loc=Mean (“centre”) of the distribution.

scale=Standard deviation (spread or “width”) of the distribution. Must be non-negative.

size =Output shape. If size is None (default), a single value is returned 

In [61]:
normal_array = np.random.normal(5, 0.5,10)
print(normal_array)

[5.00396516 4.39993269 5.74171622 5.62733653 5.07615753 5.27410256
 5.10322297 4.80833951 5.45890088 4.65774774]


In [63]:
### Min 
print(np.min(normal_array))

4.399932692225188


In [64]:
### Max 
print(np.max(normal_array))

5.741716223714627


In [65]:
### Mean 
print(np.mean(normal_array))

5.115142180480937


In [66]:
### Median
print(np.median(normal_array))

5.089690251945341


In [67]:
### Sd
print(np.std(normal_array))

0.4033932194331755


In [68]:
###Variance
print(np.var(normal_array))

0.16272608948466208


# percentile() in NumPy

Percentile is a measure used in statistics which indicates the value below which a given percentage of observations in a group of observations falls. This function takes 3 arguments percentile(array,q,axis).

array: the array for which we want to find the percentile

q: the percentile value(0-100)

axis: it can be 0/1

In [69]:
arr=np.array([[10,20,30],[40,50,60],[70,80,90]])
print(arr)

[[10 20 30]
 [40 50 60]
 [70 80 90]]


In [70]:
new_var = print(np.percentile(arr,50,axis=1))
new_var

[20. 50. 80.]


In [71]:
print(np.percentile(arr,50,axis=0))

[40. 50. 60.]


In [None]:
 ###Write a NumPy program to compute the 80th percentile for all elements 
    #in a given array along the second axis

# ptp()

This function returns the range(max-min) of values in the axis.

In [72]:
x = np.arange(4).reshape((2,2))
print(x)

[[0 1]
 [2 3]]


In [73]:
np.ptp(x, axis=0)

array([2, 2])

In [74]:
np.ptp(x, axis=1)

array([1, 1])


# Correlation in Python


In [75]:
x = np.arange(10, 20)
print(x)

[10 11 12 13 14 15 16 17 18 19]


In [76]:
y = np.array([2, 1, 4, 5, 8, 12, 18, 25, 96, 48])
print(y)

[ 2  1  4  5  8 12 18 25 96 48]


In [78]:
r = np.corrcoef(x, y)
r

array([[1.        , 0.75864029],
       [0.75864029, 1.        ]])

In [82]:
z = np.array([21, 11, 41, 51, 81, 112, 118, 125, 196, 148])
z

array([ 21,  11,  41,  51,  81, 112, 118, 125, 196, 148])

In [81]:
r = np.corrcoef([x, y,z])
r

array([[1.        , 0.75864029, 0.94945273],
       [0.75864029, 1.        , 0.86738492],
       [0.94945273, 0.86738492, 1.        ]])

# Covariance Matrix using Python

Covariance is a measure of how much two random variables vary together. It’s similar to variance, but where variance tells you how a single variable varies, co variance tells you how two variables vary together.

Covariance is nothing but a measure of correlation

Covariance can vary between -∞ and +∞
While correlation coefficients lie between -1 and +1, 

Covariance assumes the units from the product of the units of the two variables. 
On the other hand, correlation is dimensionless.

The value of covariance is affected by the change in scale of the variables.
However the value of correlation is not influenced by the change in scale of the values

In [84]:
A = [45,37,42,35,39]
B = [38,31,26,28,33]
C = [10,15,17,21,12]

data = np.array([A,B,C])
data

array([[45, 37, 42, 35, 39],
       [38, 31, 26, 28, 33],
       [10, 15, 17, 21, 12]])

In [85]:
covMatrix = np.cov(data,bias=True)
print (covMatrix)
#bias:Default normalization (False) is by (N - 1), 
#where N is the number of observations given (unbiased estimate). 
#If bias is True, then normalization is by N. 

[[ 12.64   7.68  -9.6 ]
 [  7.68  17.36 -13.8 ]
 [ -9.6  -13.8   14.8 ]]


In [86]:
r = np.corrcoef(data)
print(r)

[[ 1.          0.5184571  -0.70188642]
 [ 0.5184571   1.         -0.86094096]
 [-0.70188642 -0.86094096  1.        ]]


# Lab Exercises

In [None]:
#Write a NumPy program to find the indices of the maximum and minimum values along the given axis of an array.

In [None]:
#Write a NumPy program to to create a 1-D array of 20 element spaced evenly on a log scale between 2. and 5.

In [None]:
#Write a NumPy program to sum and compute the product of a NumPy array elements.
#Hint:np.sum() and np.prod()

In [None]:
#Write a NumPy program to calculate 50th, 40th and 90th percentiles for a sequence or single-dimensional NumPy array

In [None]:
# Write a NumPy program to add, subtract, multiply, divide arguments element-wise.

In [None]:
# Write a NumPy program to get the powers of an array values element-wise.

In [None]:
#Write a NumPy program to get the element-wise remainder of an array of division by 5.
#Hint:np.remainder(x, 5)

In [None]:
#Write a NumPy program to calculate the absolute value element-wise

In [None]:
#Write a NumPy program to round elements of the array to the nearest integer. 


In [None]:
#Write a Python program to find the maximum and minimum value of a given flattened array. 

In [None]:
#Write a NumPy program to get the minimum and maximum value of a given array along the second axis.

In [None]:
#Write a NumPy program to compute the median of flattened given array

In [None]:
#Write a NumPy program to compute the mean, standard deviation, and variance of a given array along the second axis.

In [None]:
#Write a NumPy program to compute the covariance matrix of two given arrays

In [None]:
#Write a NumPy program to compute cross-correlation of two given arrays. 

In [None]:
#Write a NumPy program to calculate the difference between the maximum and the minimum values of a given array along the second axis.

In [None]:
# Create a vector of size 10 with values ranging from 0 to 1, both excluded 

In [None]:
x = np.arange(4).reshape(2, 2) 
print(x)
np.amax(x,axis=1)
