In [1]:
# # numpy
import numpy as np

In [2]:
array = np.array([1, 2, 3, 4])
print(array)

[1 2 3 4]


In [3]:
zeroes = np.zeros((3, 3))

print(zeroes)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [4]:
ones = np.ones((2, 4))

print(ones)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [5]:
range_array = np.arange(1, 100, 3)

print(range_array)


[ 1  4  7 10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55 58 61 64 67 70
 73 76 79 82 85 88 91 94 97]


In [6]:
linspace_array = np.linspace(0, 100, 30)

print(linspace_array)

[  0.           3.44827586   6.89655172  10.34482759  13.79310345
  17.24137931  20.68965517  24.13793103  27.5862069   31.03448276
  34.48275862  37.93103448  41.37931034  44.82758621  48.27586207
  51.72413793  55.17241379  58.62068966  62.06896552  65.51724138
  68.96551724  72.4137931   75.86206897  79.31034483  82.75862069
  86.20689655  89.65517241  93.10344828  96.55172414 100.        ]


In [7]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
reshaped = arr.reshape((4, 3))
print(reshaped)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [9]:
arr = np.array([1, 2, 3])
expanded = arr[:, np.newaxis]
print(expanded)

[[1]
 [2]
 [3]]


In [12]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(a+b)
print(a*b)
print(a/b)

[5 7 9]
[ 4 10 18]
[0.25 0.4  0.5 ]


In [13]:
arr = np.array([4, 16, 25])
print(np.sqrt(arr))
print(np.sum(arr))
print(np.mean(arr))
print(np.max(arr))

[2. 4. 5.]
45
15.0
25


In [16]:
arr = np.array([10, 20, 30, 40, 50, 60])

print(arr[2])
print(arr[:3])
print(arr[-1])
print(arr[1:4])
reshaped = arr.reshape(2, 3)
print(reshaped)

30
[10 20 30]
60
[20 30 40]
[[10 20 30]
 [40 50 60]]


In [20]:
arr1 = np.arange(1, 6)
arr2 = np.arange(6, 11)
print(f"Sum of arrays {arr1} and {arr2} is {arr1+arr2}")
print(f"Difference of arrays {arr1} and {arr2} is {arr1-arr2}")
print(f"Product of arrays {arr1} and {arr2} is {arr1*arr2}")
print(f"Division of arrays {arr1} and {arr2} is {arr1/arr2}")

Sum of arrays [1 2 3 4 5] and [ 6  7  8  9 10] is [ 7  9 11 13 15]
Difference of arrays [1 2 3 4 5] and [ 6  7  8  9 10] is [-5 -5 -5 -5 -5]
Product of arrays [1 2 3 4 5] and [ 6  7  8  9 10] is [ 6 14 24 36 50]
Division of arrays [1 2 3 4 5] and [ 6  7  8  9 10] is [0.16666667 0.28571429 0.375      0.44444444 0.5       ]


In [26]:
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("Original matrix:\n", matrix)

# transpose

transpose = matrix.T
print("Transpose of original matrix:\n", transpose)

another_matrix = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
print("Addtion = \n", matrix + another_matrix)
print("Product = \n", matrix * another_matrix)

Original matrix:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Transpose of original matrix:
 [[1 4 7]
 [2 5 8]
 [3 6 9]]
Addtion = 
 [[10 10 10]
 [10 10 10]
 [10 10 10]]
Product = 
 [[ 9 16 21]
 [24 25 24]
 [21 16  9]]


In [27]:
# Array and scalar broadcasting

arr = np.array([1, 2, 3])
print(arr+10)

[11 12 13]


In [28]:
matrix = np.array([[1, 2, 3], [4, 5, 6]])
vector = np.array([1, 0, 1])
print(matrix + vector)

[[2 2 4]
 [5 5 7]]


In [29]:
# aggregation functions

matrix = np.array([[1, 2, 3], [4, 5, 6]])
print("Sum: ", np.sum(matrix))
print("Mean: ", np.mean(matrix))
print("Max: ", np.max(matrix))
print("Min: ", np.min(matrix))
print("Standard Deviation: ", np.std(matrix))
print("Sum along rows: ", np.sum(matrix, axis=1))
print("Sum along columns: ", np.sum(matrix, axis=0))

Sum:  21
Mean:  3.5
Max:  6
Min:  1
Standard Deviation:  1.707825127659933
Sum along rows:  [ 6 15]
Sum along columns:  [5 7 9]


In [33]:
# Boolean indexing and filtering

arr = np.array([1, 2, 3, 4, 5, 6])
evens = arr[arr % 2 == 0]
print("Evens: ", evens)
arr1 = np.where(arr > 3, 0, arr)
print("Modified array: ", arr1)


Evens:  [2 4 6]
Modified array:  [1 2 3 0 0 0]


In [41]:
# random number generation
rng = np.random.default_rng(2)
random_array = rng.random((3, 3))
print("Random Array with elements between 0 and 1: \n", random_array)
random_integers_array = rng.integers(0, 10, size=(2, 3))
print("Random integers matrix: \n", random_integers_array)

Random Array with elements between 0 and 1: 
 [[0.26161213 0.29849114 0.81422574]
 [0.09191594 0.60010053 0.72856053]
 [0.18790107 0.05514663 0.27496937]]
Random integers matrix: 
 [[2 6 3]
 [5 2 1]]


In [44]:
# broadcasting exercises 

rng = np.random.default_rng(3)
array = rng.integers(low = 1, high = 10, size = (3, 3))
vector = np.array([1, 0, -1])
print("Array: \n", array)
print("Vector: \n", vector)

result_add = array + vector

print("Sum of array and vector is = \n", result_add)

result_mul = array * 2

print("Product of array and 2 = \n", result_mul)

Array: 
 [[8 1 2]
 [3 2 8]
 [8 6 1]]
Vector: 
 [ 1  0 -1]
Sum of array and vector is = 
 [[9 1 1]
 [4 2 7]
 [9 6 0]]
Product of array and 2 = 
 [[16  2  4]
 [ 6  4 16]
 [16 12  2]]


In [47]:
# Generate a random dataset and filter a random dataset

# create a 5x5 of random integers between 1 and 50. filter values greater than 25 replace them with 0. calculate and print the sum, mean and standard
# deviation of the modified matrix

rng = np.random.default_rng(42)
matrix = rng.integers(low = 1, high = 51, size = (5, 5))

print("The random generated matrix =\n", matrix)

matrix_filtered = np.where(matrix > 25, 0, matrix)

print("The filtered matrix =\n", matrix_filtered)

print("Sum of the elements of the filtered matrix =\n", np.sum(matrix_filtered))
print("Mean of the elements of the filtered matrix =\n", np.mean(matrix_filtered))
print("Standard deviation of the elements of the filtered matrix =\n", np.std(matrix_filtered))

The random generated matrix =
 [[ 5 39 33 22 22]
 [43  5 35 11  5]
 [27 49 37 39 36]
 [40 26  7 42 23]
 [26 19 10 47 40]]
The filtered matrix =
 [[ 5  0  0 22 22]
 [ 0  5  0 11  5]
 [ 0  0  0  0  0]
 [ 0  0  7  0 23]
 [ 0 19 10  0  0]]
Sum of the elements of the filtered matrix =
 129
Mean of the elements of the filtered matrix =
 5.16
Standard deviation of the elements of the filtered matrix =
 7.841836519591569


In [53]:
# create a 3D random array and compute the statistic along specific axes

rng = np.random.default_rng(9)

dataset = rng.integers(low = 1, high = 101, size=(3, 3, 3))

print("The 3D dataset is as following:\n",dataset)
print("The sum of elements along the x axis =\n", np.sum(dataset, axis=1))
print("The sum of elements along the y axis =\n", np.sum(dataset, axis=0))
print("The sum of elements along the z axis =\n", np.sum(dataset, axis=-1))

The 3D dataset is as following:
 [[[43 88 97]
  [29 12 61]
  [67 78 65]]

 [[72 92 92]
  [93 87 73]
  [92  2  3]]

 [[80 44 75]
  [49 91  7]
  [68  1 15]]]
The sum of elements along the x axis =
 [[139 178 223]
 [257 181 168]
 [197 136  97]]
The sum of elements along the y axis =
 [[195 224 264]
 [171 190 141]
 [227  81  83]]
The sum of elements along the z axis =
 [[228 102 210]
 [256 253  97]
 [199 147  84]]


In [61]:
# write a program to generate a dataset of random floats and normalize the values between 0 and 1

rng = np.random.default_rng(10)

dataset = rng.random(size=(3, 3))

print("Original dataset =\n",dataset)

data_min = dataset.min()
data_max = dataset.max()
normalized_rows = (dataset - dataset.min(axis=1, keepdims=True)) / (dataset.max(axis=1, keepdims=True) - dataset.min(axis=1, keepdims=True))
print("Normalized row dataset =\n", normalized_rows)
normalized_col = (dataset - dataset.min(axis=0, keepdims=True)) / (dataset.max(axis=0, keepdims=True) - dataset.min(axis=0, keepdims=True))
print("Normalized column dataset =\n", normalized_col)
normalized_dataset = (dataset - data_min) / (data_max - data_min)

print("Normalized dataset =\n", normalized_dataset)

Original dataset =
 [[0.95600171 0.20768181 0.82844489]
 [0.14928212 0.51280462 0.1359196 ]
 [0.68903648 0.84174772 0.425509  ]]
Normalized row dataset =
 [[1.         0.         0.82954239]
 [0.03545516 1.         0.        ]
 [0.6331162  1.         0.        ]]
Normalized column dataset =
 [[1.         0.         1.        ]
 [0.         0.48121623 0.        ]
 [0.66907308 1.         0.41816436]]
Normalized dataset =
 [[1.         0.08750612 0.84445847]
 [0.01629412 0.45956985 0.        ]
 [0.67446524 0.86067982 0.35312244]]


In [68]:
# implement a conditional replacement to create a binary mask for values above a threshold

rng = np.random.default_rng(5)

dataset = rng.integers(low = 1, high = 51, size = (3, 3, 3, 3))

print(dataset, end="\n")

dataset_filtered = (dataset > 25).astype(int)

print("Filtered dataset:\n", dataset_filtered)

[[[[34 41  2]
   [41 24 26]
   [32 15 49]]

  [[ 3 14 20]
   [29 21  7]
   [ 3  1  3]]

  [[ 8 50 10]
   [33 38 12]
   [15 22 14]]]


 [[[49  9 45]
   [40 43  6]
   [20 32 25]]

  [[34 34 34]
   [ 4 48 28]
   [46 14 19]]

  [[44 10  4]
   [19 34  7]
   [44 18 12]]]


 [[[28 45 45]
   [44 16  1]
   [39 36 39]]

  [[ 1  2 26]
   [17 22 47]
   [11 27 17]]

  [[15 41  8]
   [16  6  8]
   [15 35 29]]]]
Filtered dataset:
 [[[[1 1 0]
   [1 0 1]
   [1 0 1]]

  [[0 0 0]
   [1 0 0]
   [0 0 0]]

  [[0 1 0]
   [1 1 0]
   [0 0 0]]]


 [[[1 0 1]
   [1 1 0]
   [0 1 0]]

  [[1 1 1]
   [0 1 1]
   [1 0 0]]

  [[1 0 0]
   [0 1 0]
   [1 0 0]]]


 [[[1 1 1]
   [1 0 0]
   [1 1 1]]

  [[0 0 1]
   [0 0 1]
   [0 1 0]]

  [[0 1 0]
   [0 0 0]
   [0 1 1]]]]


In [73]:
# pandas - it is a powerful library used for data manipulation

# pandas data structures - 
# 1. Series - holds 1D data arrays
# 2. DataFrame - holds 2D data arrays

import pandas as pd

s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s, "\n")

data = {"Name": ["Bruce", "Bella"], "Age": [25, 30]}
df = pd.DataFrame(data)
print(df)

a    10
b    20
c    30
dtype: int64 

    Name  Age
0  Bruce   25
1  Bella   30


In [81]:
import pandas as pd

#df = pd.read_csv("employee.csv", sep=",")

print(df.head(), "\n")
print(df.info(), "\n")
print(df.describe(), "\n")
print(df[["Name", "Age"]])

  EmployeeID,Name,Department,Age,Salary,JoinDate
0               101,Alice,HR,28,50000,2021-06-15
1        102,Bob,Engineering,32,75000,2020-09-01
2          103,Charlie,Sales,26,45000,2022-01-10
3      104,David,Engineering,35,90000,2019-03-22
4                 105,Eva,HR,29,52000,2021-11-05 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 1 columns):
 #   Column                                          Non-Null Count  Dtype 
---  ------                                          --------------  ----- 
 0   EmployeeID,Name,Department,Age,Salary,JoinDate  6 non-null      object
dtypes: object(1)
memory usage: 180.0+ bytes
None 

       EmployeeID,Name,Department,Age,Salary,JoinDate
count                                               6
unique                                              6
top                  101,Alice,HR,28,50000,2021-06-15
freq                                                1 



KeyError: "None of [Index(['Name', 'Age'], dtype='object')] are in the [columns]"

In [78]:
# select columns
print(df[["Name", "Age"]])

KeyError: "None of [Index(['Name', 'Age'], dtype='object')] are in the [columns]"