# 20 NUMPY BUILT-IN METHODS WITH EXAMPLES

In [1]:
import numpy as np

In [2]:
#1 .arange() gives you integers in the specified range excluding the last specified element in the range
print(np.arange(10))

[0 1 2 3 4 5 6 7 8 9]


In [3]:
#2 .array() is use for creating numpy arrays
a = [1, 2, 6, 5]
arr = np.array(a)
print(arr)

[1 2 6 5]


In [4]:
#3 .append() adds value to the end of an array

arr = np.arange(1, 20, 2)
print(arr)
arr = np.append(arr, [24, 26])
print(arr)

[ 1  3  5  7  9 11 13 15 17 19]
[ 1  3  5  7  9 11 13 15 17 19 24 26]


In [5]:
#4 .linspace() gives an array of evenly spaced numbers in a given interval

arr = np.linspace(1, 20)
print(arr)

[ 1.          1.3877551   1.7755102   2.16326531  2.55102041  2.93877551
  3.32653061  3.71428571  4.10204082  4.48979592  4.87755102  5.26530612
  5.65306122  6.04081633  6.42857143  6.81632653  7.20408163  7.59183673
  7.97959184  8.36734694  8.75510204  9.14285714  9.53061224  9.91836735
 10.30612245 10.69387755 11.08163265 11.46938776 11.85714286 12.24489796
 12.63265306 13.02040816 13.40816327 13.79591837 14.18367347 14.57142857
 14.95918367 15.34693878 15.73469388 16.12244898 16.51020408 16.89795918
 17.28571429 17.67346939 18.06122449 18.44897959 18.83673469 19.2244898
 19.6122449  20.        ]


In [6]:
#5 .log() gives the natural logarithm of an element
arr = np.log(5)
print(arr)

1.6094379124341003


In [7]:
#6 .reshape() gives a new shape to an array without changing the elements
arr = np.arange(1, 17)
new_arr = arr.reshape(4, 4)
print(arr)
print(new_arr)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]


In [8]:
#7 .sum() sums the elements of an array over a given axis
arr_1 = np.arange(4, 9)
arr_2 = np.sum(arr_1)
print(arr_1)
print(arr_2)

[4 5 6 7 8]
30


In [9]:
#8 .transpose() returns the an array where that has been transposed (columns turning to rows and vice versa)
arr = np.arange(1, 17).reshape(4, 4)
print(arr)
print("\n")
print(np.transpose(arr))

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]


[[ 1  5  9 13]
 [ 2  6 10 14]
 [ 3  7 11 15]
 [ 4  8 12 16]]


In [10]:
#9 concatenate() helps to join arrays
arr_1 = np.array([[1, 2, 3], [5,6,9]])
arr_2 = np.array([[3, 4, 6], [5,6,8]])
arr = np.concatenate((arr_1, arr_2), axis = 0)
print(arr)

[[1 2 3]
 [5 6 9]
 [3 4 6]
 [5 6 8]]


In [11]:
#10 .stack() helps join arrays but on a different axis
arr_1 = np.array([[1, 2, 3], [5,6,9]])
arr_2 = np.array([[3, 4, 6], [5,6,8]])
arr = np.stack((arr_1, arr_2), axis = 1)
print(arr)

[[[1 2 3]
  [3 4 6]]

 [[5 6 9]
  [5 6 8]]]


In [12]:
#11 .hstack() stacks along rows
arr_1 = np.array([[1, 2, 3], [5,6,9]])
arr_2 = np.array([[3, 4, 6], [5,6,8]])
arr = np.hstack((arr_1, arr_2))
print(arr)

[[1 2 3 3 4 6]
 [5 6 9 5 6 8]]


In [13]:
#12 .vstack() stacks array along columns
arr_1 = np.array([[1, 2, 3], [5,6,9]])
arr_2 = np.array([[3, 4, 6], [5,6,8]])
arr = np.vstack((arr_1, arr_2))
print(arr)

[[1 2 3]
 [5 6 9]
 [3 4 6]
 [5 6 8]]


In [14]:
#13 .dstack()
arr_1 = np.array([[1, 2, 3], [5,6,9]])
arr_2 = np.array([[3, 4, 6], [5,6,8]])
arr = np.dstack((arr_1, arr_2))
print(arr)

[[[1 3]
  [2 4]
  [3 6]]

 [[5 5]
  [6 6]
  [9 8]]]


In [15]:
#14 split() breaks one array into multiple, here the array split must result in equal values
arr = np.array([1, 2, 3, 4, 5, 6,])
arr_split = np.split(arr, 2)
print(arr_split)

[array([1, 2, 3]), array([4, 5, 6])]


In [16]:
#15 array_split() breaks one array into multiple while adjusting for inequality at the end
arr = np.array([1, 2, 3, 4, 5, 6,])
arr_split = np.array_split(arr, 4)
print(arr_split)

[array([1, 2]), array([3, 4]), array([5]), array([6])]


In [17]:
#16 copy() make a copy of the original array, here changes made to the original does not affect the copy
arr = np.array([1, 2, 3, 4, 5, 6,])
arr_copy = arr.copy()
arr[2] = 9
print(arr)
print(arr_copy)

[1 2 9 4 5 6]
[1 2 3 4 5 6]


In [18]:
#17 view() here changes made to the original affects the copy
arr = np.array([1, 2, 3, 4, 5, 6,])
arr_copy = arr.view()
arr[2] = 9
print(arr)
print(arr_copy)

[1 2 9 4 5 6]
[1 2 9 4 5 6]


In [19]:
#18 base() is used in checking whether an array owns the data,it returns None if an array owns the data else it returns the 
#original data
arr = np.array([1, 2, 3, 4, 5, 6,])
print(arr.base)

None


In [20]:
#19 where() searches the array and returns the index for the value you are looking for
arr = np.array([1, 2, 3, 4, 5, 6, 2, 2])
print(np.where(arr == 2))

(array([1, 6, 7], dtype=int64),)


In [21]:
#20 searchsorted() it finds the index where a value should be inserted, it works on sorted arrays else it will skip some value's indexes
arr = np.array([1, 2, 3, 4, 5, 6,])
print(np.searchsorted(arr, 5))

4


In [22]:
#21 sort() this returns a an array sorted in ascending order
arr = np.array([1, 4, 3, 4, 3, 6, 2])
print(np.sort(arr))

[1 2 3 3 4 4 6]


In [23]:
#22 repeat() it repeats the elements in an array
arr = np.array([1, 2, 3, 4, 5, 6,])
print(np.repeat(arr, 3))

[1 1 1 2 2 2 3 3 3 4 4 4 5 5 5 6 6 6]


In [24]:
#23 random.randint() returns a random integer from a specified number or interval NB: if a range is not specified
#it sets the interval as (0, whatever number given)
print(np.random.randint(5))

2


In [25]:
#24 random.choice() returns a random element from a given sequence
arr = np.random.choice([1, 2, 3, 4, 5, 6,])
print(arr)

1


In [26]:
#25 .abs() returns the array in its absolute form
arr = np.array([1, -2, 3, -4, 5, 6,])
print(np.abs(arr))

[1 2 3 4 5 6]


# 20 PANDAS BUILT-IN METHODS WITH EXAMPLES

In [27]:
import pandas as pd

In [28]:
#1 pd.Series() helps to create series in a one dimensional labelled array form. it can hold any type of data
my_list = [1, 2, 3, 4]
series = pd.Series(my_list)
series

0    1
1    2
2    3
3    4
dtype: int64

In [29]:
#2 pd.DataFrame use for creating a two dimensional data structure - a data with rows and columns
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,0


In [30]:
#3 pd.read_filename is used to load files with dataset into a DataFrame. The files can be csv, excel, json etc
df = pd.read_excel(r"C:\Users\EliteBook\Desktop\data.xlsx")
df

Unnamed: 0,1,1.1,1.2,1.3,1.4
0,1,1,1,1,1
1,1,1,1,1,1
2,1,1,1,1,1
3,1,1,1,1,1


In [31]:
#4 df.head() is used to read the specified upper part of a dataframe
df = pd.read_excel(r"C:\Users\EliteBook\Desktop\data.xlsx")
df.head(3)

Unnamed: 0,1,1.1,1.2,1.3,1.4
0,1,1,1,1,1
1,1,1,1,1,1
2,1,1,1,1,1


In [32]:
#5 df.tail() is used to read the specified lower part of a dataframe
df = pd.read_excel(r"C:\Users\EliteBook\Desktop\data.xlsx")
df.tail(2)

Unnamed: 0,1,1.1,1.2,1.3,1.4
2,1,1,1,1,1
3,1,1,1,1,1


In [33]:
#6 df.to_string() is used to print the entire dataframe
df = pd.read_excel(r"C:\Users\EliteBook\Desktop\data.xlsx")
print(df.to_string())

   1  1.1  1.2  1.3  1.4
0  1    1    1    1    1
1  1    1    1    1    1
2  1    1    1    1    1
3  1    1    1    1    1


In [34]:
#7 .info() gives you more information about your dataset
df = pd.read_excel(r"C:\Users\EliteBook\Desktop\data.xlsx")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   1       4 non-null      int64
 1   1.1     4 non-null      int64
 2   1.2     4 non-null      int64
 3   1.3     4 non-null      int64
 4   1.4     4 non-null      int64
dtypes: int64(5)
memory usage: 288.0 bytes


In [35]:
#8 .abs() returns a dataframe with the absolute value of each element
my_list = [[1, -2, 3, 4], [5, -6, 7, 0]]
df = pd.DataFrame(my_list)
df.abs()

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,0


In [36]:
#9 .add() adds a specified value to the values of a dataframe
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.add([5, 2, 1, 0])

Unnamed: 0,0,1,2,3
0,6,4,4,4
1,10,8,8,0


In [37]:
#10 gt() returns true for values greater than the specified value else it returns false
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.gt(2)

Unnamed: 0,0,1,2,3
0,False,False,True,True
1,True,True,True,False


In [38]:
#11 ge() returns true for values greater than or equal to the specified values else it returns false
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.ge(3)

Unnamed: 0,0,1,2,3
0,False,False,True,True
1,True,True,True,False


In [39]:
#12 .max() gives the maximum value in a specified axis, if no axis is specified, python sets axis = 0 and it returns all the maximum values a cloumn
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.max()

0    5
1    6
2    7
3    4
dtype: int64

In [40]:
#13 .min()
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list) 
df.min()

0    1
1    2
2    3
3    0
dtype: int64

In [41]:
#14 .mean() returns the mean of the specified axis
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.mean(1)

0    2.5
1    4.5
dtype: float64

In [42]:
#15 .replace() replaces a value with another
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.replace(2, 3)

Unnamed: 0,0,1,2,3
0,1,3,3,4
1,5,6,7,0


In [43]:
#16 sum() adds all the values in a specified axis
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.sum(1)

0    10
1    18
dtype: int64

In [44]:
#17 drop() removes the specified row or column from the dataframe
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.drop(0)

Unnamed: 0,0,1,2,3
1,5,6,7,0


In [45]:
#18 describe() returns a descriptive statistics of all columns in the dataframe
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
df.describe()

Unnamed: 0,0,1,2,3
count,2.0,2.0,2.0,2.0
mean,3.0,4.0,5.0,2.0
std,2.828427,2.828427,2.828427,2.828427
min,1.0,2.0,3.0,0.0
25%,2.0,3.0,4.0,1.0
50%,3.0,4.0,5.0,2.0
75%,4.0,5.0,6.0,3.0
max,5.0,6.0,7.0,4.0


In [46]:
#19 copy() returns a copy of the dataframe
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
print(df)
df.copy()

   0  1  2  3
0  1  2  3  4
1  5  6  7  0


Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,0


In [47]:
#20 astype() helps to change the data type of a dataframe to a specified type
my_list = [[1, 2, 3, 4], [5, 6, 7, 0]]
df = pd.DataFrame(my_list)
print(df)
print(df.dtypes)

print("\n")

new_df = df.astype("str")
print(new_df)
print(new_df.dtypes)

   0  1  2  3
0  1  2  3  4
1  5  6  7  0
0    int64
1    int64
2    int64
3    int64
dtype: object


   0  1  2  3
0  1  2  3  4
1  5  6  7  0
0    object
1    object
2    object
3    object
dtype: object
