# 3. Create Numpy Arrays

### Task 1: Create 1-D numpy array

In [None]:
# Import library
import numpy as np
import pandas as pd

# 1D Integer numpy
a1 = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Float numpy
a2 = np.array([1.2,1.4,1.6,1.8,2.0])

# String numpy
a3 = np.array(['Mary', 'John', 'Peter', 'Christine'])

# Convert date range to 1D Numpy array
date_range = pd.date_range(start='2017-10-01', end='2017-10-31') # Create Date Range
a4 = np.array(date_range.strftime('%Y-%m-%d'))

# 10 random float between 0 and 1
a5 = np.random.rand(10)

# 20 random integer between 100 and 200
np.random.seed(100) # set the seed for reproducibility
a6 = np.random.randint(low=100, high=200, size=20)

# 50 evenly spaced number between 5 and 10
a7 = np.linspace(5, 10, 50)

### Task 2: Create 2-D numpy array

In [None]:
# Import library
import numpy as np
import pandas as pd

# Integer 2x2
b1 = np.array([(1,2,3,4), (5,6,7,8)])

# Float 3x3
b2 = np.array([(1.5,2.5,3.5),(4.5,5.5,6.5),(7.5,8.5,9.5)])

# 3x3
b3 = np.array([(1.6,1.7,1.8),(2.5,5.0,7.5),(20,40,60)])

# 3x4 2D array, with initial value zero
b4 = np.zeros((3,4))

# 5x5 2D array, with initial value one
b5 = np.ones((5,5))

# 4x3 2D array, with initial value 10.5
b6 = np.full((4,3), 10.5)

# 4x4 identify matrix
b7 = np.eye(4)

# 4x8 array with random values between 1 and 6
b8 = np.random.randint(1, 6, size=(4,8))

# 4. Inspect Numpy arrays

### Task 1: Use the _shape_, _ndim_, _size_ properties and _len_ function

In [None]:
# Create 4 arrays with the values as shown in (a) to (d), then use the following properties and function on each array to inspect each one.

# Import library
import numpy as np

# Given data
c1 = [1.55, 1.73, 1.9, 1.72, 1.66, 1.8]
c2 = [('red', 'green', 'yellow', 'blue'),(True,False, False, True),
 ('2017-10-01', '2017-11-11', '2017-12-23', '2018-03-31')]
c3 = [[(10,9,8,7),(6,5,4,3)], [(1,2,3,4),(5,6,7,8)]]

# Convert into numpy
c1_a = np.array(c1)
c2_a = np.array(c2)
c3_a = np.array(c3)

# Shape
print(f"shape \n{c1_a.shape}, \n{c2_a.shape}, \n{c3_a.shape}\n")

# Dimension
print(f"ndim \n{c1_a.ndim}, \n{c2_a.ndim}, \n{c3_a.ndim}\n")

# Size
print(f"size \n{c1_a.size}, \n{c2_a.size}, \n{c3_a.size}\n")

# Length
print(f"len \n{len(c1_a)}, \n{len(c2_a)}, \n{len(c3_a)}\n")


### Task 2: Use the _type_ function, _dtype_ property

In [None]:
# Apply the type function and dtype property on each of the arrays in Task 1 to inspect them
print(f"Array 1\n{type(c1_a)}\n{c1_a.dtype}\n")
print(f"Array 2\n{type(c2_a)}\n{c2_a.dtype}\n")
print(f"Array 3\n{type(c3_a)}\n{c3_a.dtype}")

# 5. Manipulating Array Shapes

### Task 1: Flatten an array

In [None]:
# Import library
import numpy as np

# Initialize the multi-dimensional array
d1 = np.array([ ['Mary', 'John','Bob'],
               ['Zoe', 'Chris','Ann'],
               ['Leon', 'Kathy','Sam']])

# Convert the multi-dimensional array into a 1-D array
d1_flattened = d1.flatten()

print(d_flattened)

### Task 2: Reshape arrays

In [None]:
# Import library
import numpy as np

# Inititate 1D array
d2 = np.arange(15,30)

# Reshape the 1D array into 3x5 multi-dimensional array
d2_reshaped = b2.reshape((3,5))

# Inititate 2x6 array
d3 = np.array([[1,2,3,4,5,6], [7,8,9,10,11,12]])

# Reshape 2x6 array into 3x4 array
d2_reshaped = d2.reshape((3,4))

print(d2_reshaped)
print(d3_reshaped)

### Task 3: Transpose array

In [None]:
# Initiate 2x6 array
d4 = np.array([[1,2,3,4,5,6], [7,8,9,10,11,12]])

# Transpose the 2x6 array into 6x2
d4_transposed = d4.T

print(d4_transposed)

# 6. Manipulating Array Content

### Task 1: Concatenate arrays (SUBMISSION required)

In [2]:
# Import library
import numpy as np

# Initialize the arrays
e1 = np.array([[ 1,2,3],[ 4,5,6],[7,8,9]])
e2 = np.full((3,3),1.5)
e3 = np.arange(0,15).reshape(5,3)

# Part 1a: Concatenate the arrays on axis=0 (0=vertical stack)
e123_concated = np.concatenate((e1, e2, e3), axis=0)

print("Part 1a concatenate the arrays on axis=0.")
print(f"{e123_concated}\n")

# Initialize the array
e4 = np.array([[ 1,2,3,4],[ 4,5,6,7],[7,8,9,10],[11,12,13,14]])
e5 = np.random.randint(100,200,(4,6))
e6 = np.arange(0,40).reshape(4,10)

# Concatenate the arrays on axis=1 (1=horizontal stack)
e456_concated = np.concatenate((e4, e5, e6), axis=1)

print("Part 1b concatenate the arrays on axis=1.")
print(e456_concated)

Part 1a concatenate the arrays on axis=0.
[[ 1.   2.   3. ]
 [ 4.   5.   6. ]
 [ 7.   8.   9. ]
 [ 1.5  1.5  1.5]
 [ 1.5  1.5  1.5]
 [ 1.5  1.5  1.5]
 [ 0.   1.   2. ]
 [ 3.   4.   5. ]
 [ 6.   7.   8. ]
 [ 9.  10.  11. ]
 [12.  13.  14. ]]

Part 1b concatenate the arrays on axis=1.
[[  1   2   3   4 175 197 182 166 109 154   0   1   2   3   4   5   6   7
    8   9]
 [  4   5   6   7 107 181 164 142 133 117  10  11  12  13  14  15  16  17
   18  19]
 [  7   8   9  10 142 174 196 105 174 127  20  21  22  23  24  25  26  27
   28  29]
 [ 11  12  13  14 198 130 119 110 105 103  30  31  32  33  34  35  36  37
   38  39]]


### Task 2: Split arrays

In [None]:
# Import library
import numpy as np

# Initialize the array
e7 = np.arange(100)

# Split the array into 20 sub-arrays
e7_splitted = np.split(e7, 20)

# Initialize the array
e8 = np.arange(100)

# Define intervals
intervals = [10, 25, 45, 75,95]

# Split the arrays by intervals
e8_splitted = np.array_split(e8, intervals)

# 8. Sorting arrays


### Task 1: Basic Sorting (SUBMISSION required)

In [6]:
# Import library
import numpy as np

# Initialize 1D array
arr_1 = np.random.randint(100, 200, 10)

# Sort array
arr_1_sorted = np.sort(arr_1)

print(f"** Part 8a: Before sorting **\n {arr_1}")
print(f"** Part 8a: After sorting **\n {arr_1_sorted}\n")

# Initialize 2D array
arr_2 = np.random.randint(1, 20, (3,5))

# Sort array
arr_2_sorted = np.sort(arr_2, axis=0)

print(f"** Part 8b: Before sorting **\n {arr_2}")
print(f"** Part 8b: After sorting **\n {arr_2_sorted}\n")

# Initialize 2D array
arr_3 = np.random.randint(100, 200, (2,5))
arr_3_copied = arr_3.copy()

# Sort array
arr_3c_sorted = np.sort(arr_3_copied, axis=0)

print(f"** Part 8c: Before sorting - original array **\n {arr_3}")
print(f"** Part 8c: After sorting - original array **\n {arr_3}")
print(f"** Part 8c: After sorting - copy of sorted array **\n {arr_3c_sorted}")



** Part 8a: Before sorting **
 [111 123 156 144 161 101 171 126 128 124]
** Part 8a: After sorting **
 [101 111 123 124 126 128 144 156 161 171]

** Part 8b: Before sorting **
 [[11 14 17 12 12]
 [ 1 18  8 10  3]
 [ 2 12 17  6  7]]
** Part 8b: After sorting **
 [[ 1 12  8  6  3]
 [ 2 14 17 10  7]
 [11 18 17 12 12]]

** Part 8c: Before sorting - original array **
 [[119 102 158 191 161]
 [137 116 171 139 121]]
** Part 8c: After sorting - original array **
 [[119 102 158 191 161]
 [137 116 171 139 121]]
** Part 8c: After sorting - copy of sorted array **
 [[119 102 158 139 121]
 [137 116 171 191 161]]


# 9. Subsetting and Indexing

### Task 1: Basic subsetting

In [None]:
# Import library
import numpy as np

# Initialize array
a = np.arange(1,100,1)
b = np.array([[1,2,3,4], [100,200,300,400],  [5,6,7,8],  [9,10,11,12] ])

# Output of a[3] and b[3] and its datatype
print(f"Out of a[3] is {a[3]} and datatype is {a.dtype}")
print(f"Out of b[3] is {b[3]} and datatype is {b.dtype}\n")

# print out 2nd and 3rd row of array b
print(f"{b[1:3]}\n")

# print out the all the rows of last column of array b
print(f"{b[:, -1]}\n")

# print out row 3 to end, column 2 to end of array b
print(f"{b[2:, 1:]}\n")

# print elements from index 10 to 20 with step 2 from array a
print(f"{a[10:20:2]}\n")

# Select and reverse elements in array a from index 0 to the end, print
print(f"{a[0:: -1]}")

Out of a[3] is 4 and datatype is int64
Out of b[3] is [ 9 10 11 12] and datatype is int64

[[100 200 300 400]
 [  5   6   7   8]]

[  4 400   8  12]

[[ 6  7  8]
 [10 11 12]]

[11 13 15 17 19]

[1]


### Task 2: Boolean Indexing (SUBMISSION required)

In [7]:
# Import library
import numpy as np

# Initialize array
a = np.array((np.arange(0,10),
             np.arange(10,20),
             np.arange(20,30),
             np.arange(30,40)))
b = np.random.randint(100,200,(3,3))

# Part 9-2a: Return all even numbers in array a
a_even = a[a%2 == 0]
print(f"Original array a: \n{a}.")
print(f"All even numbers in array a are {a_even}.\n")

# Part 9-2b: Return all numbers that are greater than 150 in array b
b_greater_150 = b[b > 150]
print(f"Original array b:\n {b}.")
print(f"Elements that are greater than 150 in array b are {b_greater_150}.")

Original array a: 
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]].
All even numbers in array a are [ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38].

Original array b:
 [[172 107 163]
 [186 160 188]
 [197 151 119]].
Elements that are greater than 150 in array b are [172 163 186 160 188 197 151].


### Task 3: Indexing using WHERE

In [None]:
# Import library
import numpy as np

# Initialize array
b = np.arange(9.).reshape(3,3)

# print out the values and the rows and indexes where the elements are greater than 5
b5 = np.where(b > 5,b,0)
print("*** Values of where the elements are, and their rows/columns")
for i in range(0,3):
  print(f"Value: {b5[2][i]:.0f} in row {np.where(b>5)[1][2]}, column {np.where(b>5)[1][i]}")


*** Values of where the elements are, and their rows/columns
Value: 6 in row 2, column 0
Value: 7 in row 2, column 1
Value: 8 in row 2, column 2


# 10. Array Math

### Task 1: Add, substract, multiply and divide numpy arrays

In [None]:
# Import library
import numpy as np

# Initialize arrays
a = np.arange(100,200,1).reshape(20,5)
b = np.arange(1,2000,20).reshape(10,10)
c = np.array([[1,2,3], [4,5,6]])

# Part A: Try to perform print(a+b)
# print(a+b) # return error

# Part B: Create an array a2 that has the same shape as array a, containing random number between 0 and 100.
# Print the content a+a2
a2 = np.random.randint(0,100,(20,5))
# print(f"Original array a: {a}")
# print(f"Original array a2: {a2}")
print(f"Array a+a2: {a+a2}\n")

# Part C: Extract a subset of array b into variable b1,
# subtract array c from b1
b1 = np.random.choice(b[0], size=(2,3))
# print(f"Original array c: {c}")
# print(f"Original array b1: {b1}")
print(f"Array b-c: {b1-c}\n")

# Part D: Extract a subset of array a into a2, where a2 are even numbers and same shape as array c
# multiple a2 and c.
a2 = np.random.choice(a[a%2==0][0], size=(2, 3))
# print(f"Original array a2: {a2}")
# print(f"Original array c: {c}")
print(f"Array a2*c: {a2*c}\n")

# Part E: Create an array c2 which contain array c multiplied by 10
# Use a2 in part D to perfrom a2/c2
c2 = c*10
print(f"Array a2/c2: {a2/c2}\n")

# 11. Statistical methods

### Task 1: sum, mean (SUBMISSION required)

In [8]:
# Import library
import numpy as np

# Initialize array
a = np.array((np.arange(0,10),
             np.arange(10,20),
             np.arange(20,30),
             np.arange(30,40)))

# Print contents of array a
print(f"Content of array a: \n{a}\n")


# Sum of all numbers in array a
print(f"Sum of all numbers in array a: {a.sum()}\n")

# Mean of all numbers in array a
print(f"Mean of all numbers in array a: {a.mean()}\n")

# Sum of all numbers each row
print("Sum of all numbers in each row")
for i in range(len(a)):
  print(f"Row {i+1} sum = {sum(a[i])}")


Content of array a: 
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]]

Sum of all numbers in array a: 780

Mean of all numbers in array a: 19.5

Sum of all numbers in each row
Row 1 sum = 45
Row 2 sum = 145
Row 3 sum = 245
Row 4 sum = 345


### Task 2: std, var

In [None]:
# Import library
import numpy as np

# Initialize array
a = np.arange(10,50,0.5)
b = np.arange(10,20,0.1)

# Print content of array a
print(f"Content of array a\n{a}\n")

# Print content of array b
print(f"Content of array b\n{b}\n")

# Standard deviation of arrays a & b
print(f"Standard Deviation for array a: {a.std():.2f}")
print(f"Standard Deviation for array b: {b.std():.2f}\n")

# Variance of arrays a & b
print(f"Variance for array a: {a.var():.2f}")
print(f"Variance for array b: {b.var():.2f}")

Content of array a
[10.  10.5 11.  11.5 12.  12.5 13.  13.5 14.  14.5 15.  15.5 16.  16.5
 17.  17.5 18.  18.5 19.  19.5 20.  20.5 21.  21.5 22.  22.5 23.  23.5
 24.  24.5 25.  25.5 26.  26.5 27.  27.5 28.  28.5 29.  29.5 30.  30.5
 31.  31.5 32.  32.5 33.  33.5 34.  34.5 35.  35.5 36.  36.5 37.  37.5
 38.  38.5 39.  39.5 40.  40.5 41.  41.5 42.  42.5 43.  43.5 44.  44.5
 45.  45.5 46.  46.5 47.  47.5 48.  48.5 49.  49.5]

Content of array b
[10.  10.1 10.2 10.3 10.4 10.5 10.6 10.7 10.8 10.9 11.  11.1 11.2 11.3
 11.4 11.5 11.6 11.7 11.8 11.9 12.  12.1 12.2 12.3 12.4 12.5 12.6 12.7
 12.8 12.9 13.  13.1 13.2 13.3 13.4 13.5 13.6 13.7 13.8 13.9 14.  14.1
 14.2 14.3 14.4 14.5 14.6 14.7 14.8 14.9 15.  15.1 15.2 15.3 15.4 15.5
 15.6 15.7 15.8 15.9 16.  16.1 16.2 16.3 16.4 16.5 16.6 16.7 16.8 16.9
 17.  17.1 17.2 17.3 17.4 17.5 17.6 17.7 17.8 17.9 18.  18.1 18.2 18.3
 18.4 18.5 18.6 18.7 18.8 18.9 19.  19.1 19.2 19.3 19.4 19.5 19.6 19.7
 19.8 19.9]

Standard Deviation for Array a: 11.55
Standa

### Task 3: min/max, argmin/argmax

In [None]:
# Import library
import numpy as np

# Initialize array
a = np.random.randint(1,1000,(3,4))

# Print content of row
print(f"Contents of array a\n {a}\n")

# Print the max value of each row in array a
print(f"Max value for each row: {a.max(axis=1)}")

# print max value and its row & column in array a
print(f"Max value in array a: {a.max()}")
max_index = np.unravel_index(np.argmax(a, axis=None), a.shape)
print(f"Row and Column of {a.max()} is ([{max_index[0]}], [{max_index[1]}])\n")

# Print the min value of each row in array a
print(f"Min value for each row: {a.min(axis=1)}")

# print min value in array
print(f"Min value in array a: {a.min()}")
min_index = np.unravel_index(np.argmin(a, axis=None), a.shape)
print(f"Row and Column of {a.min()} is ([{min_index[0]}], [{min_index[1]}])")

Contents of array a
 [[739  14 589 251]
 [290 109 891 614]
 [655 663 835 948]]

Max value for each row: [739 891 948]
Max value in array a: 948
Row and Column of 948 is ([2], [3])

Min value for each row: [ 14 109 655]
Min value in array a: 14
Row and Column of 14 is ([0], [1])


### Task 4: cumsum, cumprod

In [None]:
# import library
import numpy as np

# initialize array
a = np.arange(1,21).reshape(4,5)

# print content of a
print(f"Contents of array a\n{a}\n")

# print cumulative sum along a-xis (0=vertical stack)
print(f"Cumulative sum of along x-axis: \n{a.cumsum(axis=0)}\n")

# print cumulative sum along y-xis (1=horizontal stack)
print(f"Cumulative sum of along y-axis: \n{a.cumsum(axis=1)}\n")

# print cumulative product along x-xis (0=vertical stack)
print(f"Cumulative product of along x-axis: \n{a.cumprod(axis=0)}\n")

# print cumulative product along y-xis (1=horizontal stack)
print(f"Cumulative product of along y-axis: \n{a.cumprod(axis=1)}\n")

Contents of array a
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]

Cumulative sum of along x-axis: 
[[ 1  2  3  4  5]
 [ 7  9 11 13 15]
 [18 21 24 27 30]
 [34 38 42 46 50]]

Cumulative sum of along y-axis: 
[[ 1  3  6 10 15]
 [ 6 13 21 30 40]
 [11 23 36 50 65]
 [16 33 51 70 90]]

Cumulative product of along x-axis: 
[[    1     2     3     4     5]
 [    6    14    24    36    50]
 [   66   168   312   504   750]
 [ 1056  2856  5616  9576 15000]]

Cumulative product of along y-axis: 
[[      1       2       6      24     120]
 [      6      42     336    3024   30240]
 [     11     132    1716   24024  360360]
 [     16     272    4896   93024 1860480]]



# 12. Filo I/O on Numpy arrays

### Task 1: Load and save a numpy array (SUBMISSION required)

In [9]:
# Import library
import numpy as np

# Initialize variable
fname = "singapore-residents-by-ethnic-group-and-sex-end-june-annual.csv"

# Read csv file with loadtxt()
data = np.loadtxt(fname, skiprows=1,
                  dtype=[('year','i8'),('level_1','U50'),('value','i8')], delimiter=',')

# print the total rows of data in the file
print(f"There are altogether {len(data)} rows of data in the file '{fname}'.\n")

# print out the number of years of data captured
data_years = data['year'] # extract the year column
years = np.unique(data_years) # get the unique values in this column
print(f"There are {len(years)} years of data captured from {years.min()} to {years.max()}.\n")

# Extract only the rows with "Total Residens" in the "level_1" column
keyword = "Total Residents"
column_to_search = data['level_1']
out = [i for i,v in enumerate(column_to_search) if keyword in v] # Filter index if meet keyword
data_total_residents = data[out]

# Print out the year which has the highest total number of residents
max = data_total_residents['value'].max()
argmax = data_total_residents['value'].argmax()
print(f"The year with the highest total number of residents is {data_total_residents[argmax]['year']}.")
print(f"Population Count: {max}.\n")

# print out year which has the lowest total number of residents
min = data_total_residents['value'].min()
argmin = data_total_residents['value'].argmin()
print(f"The year with the lowest total number of residents is {data_total_residents[argmin]['year']}.")
print(f"Population Count: {min}.")

There are altogether 855 rows of data in the file 'singapore-residents-by-ethnic-group-and-sex-end-june-annual.csv'.

There are 57 years of data captured from 1960 to 2016.

The year with the highest total number of residents is 2016.
Population Count: 3933559.

The year with the lowest total number of residents is 1960.
Population Count: 1646400.


In [None]:
max

3933559

In [None]:
argmax

56