# Numpy exercises

In [61]:
import numpy as np

In [62]:
np.random.seed(123)

#### Print the numpy version

In [63]:
print(np.__version__)

1.10.4


#### Create the following vectors

In [64]:
# A vector of zeros of size 10 but the fifth value equal to 1
Z = np.zeros(10)
Z[4] = 1
print(Z)

# A vector with values ranging from 1 to 50
print np.arange(1, 51)

# A vector with the integer multiples of 5 less than 100
print np.arange(0, 100, 5)

# Reverse the vector reciently created (first element becomes last)
print np.arange(0, 100, 5)[::-1]

[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50]
[ 0  5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95]
[95 90 85 80 75 70 65 60 55 50 45 40 35 30 25 20 15 10  5  0]


#### Create a 3x3 matrix with values ranging from 1 to 9

In [65]:
Z = np.arange(1,10).reshape(3,3)
print(Z)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#### Find indices of non-zero elements from ``[1,2,0,0,4,0]``

In [66]:
x = np.array([1,2,0,0,4,0])
nz = np.nonzero(x)
print nz[0]
print np.where(x != 0)[0]

[0 1 4]
[0 1 4]


#### Create a 10x10 array with random values and find the minimum and maximum values

In [67]:
Z = np.random.random((10,10))
Zmin, Zmax = Z.min(), Z.max()
print(Zmin, Zmax)

(0.016129206695016829, 0.99535848203401744)


#### Create a random vector of size 30 and find the mean and the standard deviation

In [68]:
Z = np.random.random(30)
print Z.mean()
print Z.std()

0.517560667551
0.275640205023


#### Create a 2d array of shape (10,10) with 1 on the border and 0 inside

In [69]:
Z = np.ones((10,10))
Z[1:-1,1:-1] = 0
print(Z)

[[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]


#### Create random vector of size 10 and replace the maximum value by 0

In [70]:
Z = np.random.random(10)
Z[Z.argmax()] = 0
print(Z)

[ 0.73107304  0.16106901  0.60069857  0.86586446  0.          0.07936579
  0.42834727  0.20454286  0.45063649  0.54776357]


#### Create two arrays of size 10 with random integers between 1 and 20. Find common numbers between two arrays.

In [71]:
a1 = np.random.randint(0,20,10)
print a1
a2 = np.random.randint(0,20,10)
print a2
print np.intersect1d(a1,a2)

[ 4  4  5 16  9  2  5  1  5  9]
[15  2 10  4  3 16  3  2  5 17]
[ 2  4  5 16]


#### Create a 3 x 5 matrix of integers from 0 to 9, find the sums of the rows, the columns and the whole matrix

In [72]:
M = np.array(np.random.randint(0,9,15), dtype=np.int).reshape(3,5)
print M
print '\n===='
print M.sum(axis = 0)  #column sums
print M.sum(axis = 1) #row sums
print M.sum() # entire matrix

[[7 5 1 2 8]
 [5 0 3 3 3]
 [1 7 6 3 1]]

====
[13 12 10  8 12]
[23 14 18]
55


#### Calculate the values of the function $f(x) = x^2 + 3x - 6$ for 100 numbers in from -2 to 2

In [73]:
x = np.linspace(-2, 2, num=100)
x**2 + 3*x - 6

array([-8.        , -8.03877155, -8.07427813, -8.10651974, -8.13549638,
       -8.16120804, -8.18365473, -8.20283645, -8.21875319, -8.23140496,
       -8.24079176, -8.24691358, -8.24977043, -8.24936231, -8.24568922,
       -8.23875115, -8.22854811, -8.21508009, -8.19834711, -8.17834915,
       -8.15508622, -8.12855831, -8.09876543, -8.06570758, -8.02938476,
       -7.98979696, -7.94694419, -7.90082645, -7.85144373, -7.79879604,
       -7.74288338, -7.68370574, -7.62126314, -7.55555556, -7.486583  ,
       -7.41434547, -7.33884298, -7.2600755 , -7.17804306, -7.09274564,
       -7.00418325, -6.91235588, -6.81726354, -6.71890623, -6.61728395,
       -6.51239669, -6.40424446, -6.29282726, -6.17814509, -6.06019794,
       -5.93898582, -5.81450872, -5.68676666, -5.55575962, -5.4214876 ,
       -5.28395062, -5.14314866, -4.99908173, -4.85174982, -4.70115294,
       -4.54729109, -4.39016427, -4.22977247, -4.0661157 , -3.89919396,
       -3.72900724, -3.55555556, -3.37883889, -3.19885726, -3.01

#### Implement the following algorithm in a function to detect outliers in an vector of numbers:
1. Calculate the first and third quartiles $Q_1, Q_3$ of a vector.
1. Calculate the inter-quartile range (IQR) of a vector.
2. Categorize as outliers any value outside the intervar $[Q_1 - 1.5\times IQR, Q_3 + 1.5 \times IQR]$

In [74]:
def detect_outliers(vector):
    Q1 = np.percentile(vector, q=25)
    Q3 = np.percentile(vector, q=75)
    IQR = Q3 - Q1
    mask = (vector < (Q1 - 1.5*IQR)) | (vector > (Q3 + 1.5*IQR))
    return vector[mask]

print detect_outliers(np.random.normal(size=100)) # 100 random normal values
print detect_outliers(np.random.normal(size=500)) # 500 random normal values
print detect_outliers(np.random.normal(size=2000)) # 2000 random normal values

[-2.5570546  -3.23105501  2.2007021 ]
[ 2.95862545 -2.78811288  3.57157922]
[-2.7944723   2.76660307 -3.16705533 -2.78083727 -2.93022284 -3.8013782
  2.85070774 -2.83365746 -3.58749383 -2.63792249 -2.84084736 -3.06698763
 -2.92002932 -2.87205861  2.79197117 -2.67449029]


#### Using the same algorithm as before, implement a function that replaces all the outliers with the appropiate endpoint of the given interval (for small outliers use the left endpoint and for large outliers the right one)

In [75]:
def replace_outliers(vector):
    Q1 = np.percentile(vector, q=25)
    Q3 = np.percentile(vector, q=75)
    IQR = Q3 - Q1
    vector[vector < (Q1 - 1.5*IQR)] = Q1 - 1.5*IQR
    vector[vector > (Q3 + 1.5*IQR)] = Q3 + 1.5*IQR
    return vector

x = np.random.normal(size=10)
x[0] = 20 # this is an outlier we introduce in our vector
print x
print replace_outliers(x)

[ 20.           0.31902867  -1.57333838   0.82567769  -0.31525042
   0.24229681  -0.21341039  -0.08020001  -1.11384279  -0.16879759]
[ 1.18429988  0.31902867 -1.17424459  0.82567769 -0.31525042  0.24229681
 -0.21341039 -0.08020001 -1.11384279 -0.16879759]


#### Using the standard normal distribution create a random vector of size 100. Find the closest value to 1

In [76]:
z = np.random.normal(size = 100)
index_closest = (np.abs(z-1)).argmin()
print(z[index_closest])

0.98581491881


#### Repeate the same excersise as above but this time create a 100 x 10 matrix. Find the closest value to 1 in every column.

In [77]:
z = np.random.normal(size = 1000).reshape(100,10)
index_closest = np.abs(z - 1).argmin(axis=0)
[z[:,i][index_closest[i]] for i in range(10)]

[1.013471037295286,
 1.0201166859834945,
 1.0423080349227836,
 1.0725108548069282,
 1.0624078153219716,
 1.0141309534268013,
 1.0114098227508985,
 1.0073538727733313,
 0.99337636049268041,
 1.0711998720624445]

#### Create a matrix 20 x 5 with random normal values with mean=100. Subtract the mean of each column of the matrix

In [78]:
X = np.random.normal(loc=100, size=(20,5))

Y = X - X.mean(axis=0)
Y.mean(axis=0) # Try to explain this result

array([ -1.91846539e-14,   1.35003120e-14,  -1.84741111e-14,
         4.26325641e-15,  -2.84217094e-15])

#### Create an array of 100 integers from 0 to 9. Find the most and the least frequent values in the array.

In [79]:
Z = np.random.randint(0,10,100)
print "Least Frecuent: ", np.bincount(Z).argmin()
print "Most Frecuent: ",np.bincount(Z).argmax()

Least Frecuent:  8
Most Frecuent:  3


#### Simulate tossing a fair coin 20 times, count the number of "heads".

In [80]:
coin = np.random.randint(0,2,size=20)
print coin # we assume that 1 is "heads"
print "We got {} heads".format(coin.sum())

[0 1 1 1 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0]
We got 9 heads


#### As in the last exercise, simulate tossing a fair coin 20 times, do the same simulation 1,000 times. Count the number of "heads" for each simulation (save them in an array ``counts``). Report and give an interpretation of the descriptive statistics of ``counts``. Print the distribution of ``counts``

In [81]:
#Each column of this matrix will be one 20-tosses simulation
coin_matrix = np.random.randint(0,2,size=(20,1000)) 
#Counting the number of heads in each simulation (1 = "heads")
counts = coin_matrix.sum(axis=0)

print counts.mean()
print np.median(counts)
print counts.min(), counts.max()
print counts.std()

unique_numbers = np.unique(counts) # The numbers that where observed in the counts vector
observed_times = np.bincount(counts) # Check out the documentation for the np.bincount function
observed_times = observed_times[observed_times>0] # deleting the zero-counts
print "===============\n"
for n, count in zip(unique_numbers, observed_times):
    print "{} heads observed {} times".format(n, count)

9.945
10.0
2 16
2.26008296308

2 heads observed 1 times
3 heads observed 1 times
4 heads observed 8 times
5 heads observed 21 times
6 heads observed 35 times
7 heads observed 68 times
8 heads observed 117 times
9 heads observed 167 times
10 heads observed 170 times
11 heads observed 168 times
12 heads observed 123 times
13 heads observed 67 times
14 heads observed 31 times
15 heads observed 19 times
16 heads observed 4 times
