# Write down 20 built-in methods on pandas and numpy with examples each.

In [31]:
import numpy as np
import pandas as pd

## Numpy Methods

### 1) np.array
#### Creates an array from a list

In [32]:
a= [23,16, 89, 43, 80, 22, 13, 67, 109, 99, 74, 65]
a = np.array(a)
a

array([ 23,  16,  89,  43,  80,  22,  13,  67, 109,  99,  74,  65])

### 2) np.arange
#### Creates an array from a range

In [33]:
b = np.arange(1, 20, 5)
b

array([ 1,  6, 11, 16])

### 3) np.reshape
#### Reshapes an existing array to a specified new shape

In [34]:
e = np.array([[2, 4, 6, 8, 10],
              [3, 5, 7, 9, 11]])
e.reshape(5,2)

array([[ 2,  4],
       [ 6,  8],
       [10,  3],
       [ 5,  7],
       [ 9, 11]])

### 4) np.transpose
#### Transfers the values in a row to a column and values in a column to a row

In [35]:
f = np.arange(1, 25).reshape(8,3)
print(f)
f.transpose()

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]
 [13 14 15]
 [16 17 18]
 [19 20 21]
 [22 23 24]]


array([[ 1,  4,  7, 10, 13, 16, 19, 22],
       [ 2,  5,  8, 11, 14, 17, 20, 23],
       [ 3,  6,  9, 12, 15, 18, 21, 24]])

### 5) np.concatenate
#### Combines two or more arrays together

In [36]:
g = np.array([[1,3],
              [2,4],
              [5,7],
              [6,8]])

h = np.array([[9, 11],
              [10, 12],
              [13, 15],
              [14, 16]])

# Concatenate row-wise as axis, 0 refers to rows
np.concatenate((g, h), axis = 0)

array([[ 1,  3],
       [ 2,  4],
       [ 5,  7],
       [ 6,  8],
       [ 9, 11],
       [10, 12],
       [13, 15],
       [14, 16]])

In [37]:
g = np.array([[1,3],
              [2,4],
              [5,7],
              [6,8]])

h = np.array([[9, 11],
              [10, 12],
              [13, 15],
              [14, 16]])

# Concatenate Column-wise as axis, 1 refers to columns
np.concatenate((g, h), axis = 1)

array([[ 1,  3,  9, 11],
       [ 2,  4, 10, 12],
       [ 5,  7, 13, 15],
       [ 6,  8, 14, 16]])

### 6) np.flatten
#### Collapses a multi-dimensional array into a single dimension

In [38]:
i = np.array([[1, 2, 3], [2, 4, 6],
              [3, 6, 9], [4, 8, 12],
              [5, 10, 15], [6, 12, 18],
              [7, 14, 21], [8, 16, 24]])
i.flatten()

array([ 1,  2,  3,  2,  4,  6,  3,  6,  9,  4,  8, 12,  5, 10, 15,  6, 12,
       18,  7, 14, 21,  8, 16, 24])

### 7. np.resize
#### Changes the number of elements in an array, either by adding to or deleting values from the end

In [39]:
k = np.arange(1,30,3)
print(k)
print(k.size)

# Resizes the array by removing the 2 values at the end of the array to reduce the size from 10 to 8
k.resize(8)
k

[ 1  4  7 10 13 16 19 22 25 28]
10


array([ 1,  4,  7, 10, 13, 16, 19, 22])

In [40]:
k = np.arange(1,30,3)
print(k)
print(k.size)

#Resizes the array by adding 2 zeros at the end of the array to increase the size from 10 to 12
k.resize(12)
k

[ 1  4  7 10 13 16 19 22 25 28]
10


array([ 1,  4,  7, 10, 13, 16, 19, 22, 25, 28,  0,  0])

In [41]:
# Original array
k = np.arange(1,30,3)
print(k)
print(k.size)

print(" ") # for space between outputs

# Returns a new array with the specified shape, repeats elements in the old array to compensate for the addition
k1= np.resize(k, (5,3))
print(k1)
print(k1.size)

print(" ") # for space between outputs

# Returns a new array with the specified shape, adds zeros to compensate for the addition
k.resize(5,3)
print(k)
print(k.size)

[ 1  4  7 10 13 16 19 22 25 28]
10
 
[[ 1  4  7]
 [10 13 16]
 [19 22 25]
 [28  1  4]
 [ 7 10 13]]
15
 
[[ 1  4  7]
 [10 13 16]
 [19 22 25]
 [28  0  0]
 [ 0  0  0]]
15


### 8) np.random
#### Creates an array with randomly generated values using varied sub-methods

In [42]:
l = np.random.randint(25, 70, 10)
l

array([57, 43, 60, 68, 61, 56, 53, 28, 37, 41])

### 9. np.sort
#### Sorts an array from lowest to highest or vice versa

In [43]:
n = np.random.randint(20, 200, 30).reshape(5,6)
print(n)

np.sort(n)

[[ 24 148  29  71 190  34]
 [140  61  53 184  26  29]
 [121  28  50 125 100 103]
 [131  88 142 158  81 102]
 [ 98  79  97 108  89  66]]


array([[ 24,  29,  34,  71, 148, 190],
       [ 26,  29,  53,  61, 140, 184],
       [ 28,  50, 100, 103, 121, 125],
       [ 81,  88, 102, 131, 142, 158],
       [ 66,  79,  89,  97,  98, 108]])

### 10) np.sum
#### Sums up all the values in an array 

In [44]:
o = np.random.randint(1, 100, 25)
print(o)
o.sum()

[31 31 59 97 86 94 67 66 28 15 26 96 50 32 44 52 81 71 92 91 98 35 61 43
 14]


1460

### 11) np.mean
#### Returns the average of all the elements in an array

In [45]:
d = np.array([10, 20, 30 ,40, 50, 60, 70, 80])
d.mean()

45.0

### 12. np.argmax
#### Returns the index of the largest value

In [46]:
p = np.array([2, 3, 5, 6, 8, 9, 12, 13])
p.argmax()

7

### 13. np.round
#### Returns float values rounded to float values with specified no of decimal places

In [47]:
q = np.array([23.5523, 89.7811, 66.4233, 101.0972, 12.3876])
q.round(decimals= 2)

array([ 23.55,  89.78,  66.42, 101.1 ,  12.39])

### 14) np.hsplit
#### Splits an array along its horizontal axis

In [48]:
r = np.arange(1, 25).reshape(2, 12)
print(r)
np.hsplit(r, 3)

[[ 1  2  3  4  5  6  7  8  9 10 11 12]
 [13 14 15 16 17 18 19 20 21 22 23 24]]


[array([[ 1,  2,  3,  4],
        [13, 14, 15, 16]]),
 array([[ 5,  6,  7,  8],
        [17, 18, 19, 20]]),
 array([[ 9, 10, 11, 12],
        [21, 22, 23, 24]])]

### 15) np.vsplit
#### Splits an array alomg its vertical axis

In [49]:
r = np.arange(1, 25).reshape(6,4)
print(r)
np.vsplit(r, 3)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]]


[array([[1, 2, 3, 4],
        [5, 6, 7, 8]]),
 array([[ 9, 10, 11, 12],
        [13, 14, 15, 16]]),
 array([[17, 18, 19, 20],
        [21, 22, 23, 24]])]

### 16. np.copy
#### Makes a copy of an array and its data

In [50]:
s = np.array([[1,10],
              [2, 9],
              [3, 8],
              [4, 7],
              [5, 6]])
s1 = s.copy()
print(s1)

# Checking to see if both arrays, s and s1 contain the same values
s1 == s

[[ 1 10]
 [ 2  9]
 [ 3  8]
 [ 4  7]
 [ 5  6]]


array([[ True,  True],
       [ True,  True],
       [ True,  True],
       [ True,  True],
       [ True,  True]])

### 17. np.append
#### Adds values to the end of an array

In [51]:
# Creating an array 
t = np.arange(5,28, 3).reshape(4,2)
print(t)

print(" ")  # for space between outputs

# Adding the values 3,4 to the array
t = np.append(t, [3,4])
print(t)

# Reshaping the array to accomodate the new values
t = t.reshape(5,2)
t

[[ 5  8]
 [11 14]
 [17 20]
 [23 26]]
 
[ 5  8 11 14 17 20 23 26  3  4]


array([[ 5,  8],
       [11, 14],
       [17, 20],
       [23, 26],
       [ 3,  4]])

### 18) np.unique
#### Returns the unique values in array, excluding the duplicates

In [52]:
# Creating an array
u = np.array([[1, 0], [2, 18], [3, 27], [2, 18], [4, 32], [5, 45], [0, 0], [5, 45], [7, 90], [5,35], [2,18], [0,2]])

# displaying the shape (no of rows and columns) of the original array
print(u.shape)

print(" ")  # for space between outputs

# selecting the unique values
print(np.unique(u, axis=0))

# displaying the shape (no of rows and columns) of the array containing only the unique values
np.unique(u, axis=0).shape

(12, 2)
 
[[ 0  0]
 [ 0  2]
 [ 1  0]
 [ 2 18]
 [ 3 27]
 [ 4 32]
 [ 5 35]
 [ 5 45]
 [ 7 90]]


(9, 2)

### 19) np.sqrt
#### Returns the square roots of elements in an array

In [53]:
v = np.array([4, 16, 36, 64, 100, 144, 225, 256])
np.sqrt(v)

array([ 2.,  4.,  6.,  8., 10., 12., 15., 16.])

### 20) np.tolist
#### Converts a numpy array to a list

In [54]:
t = np.arange(8, 64, 5)
t = t.tolist()
print(t)
print(type(t))

[8, 13, 18, 23, 28, 33, 38, 43, 48, 53, 58, 63]
<class 'list'>


## Pandas Methods

In [68]:
# Creating a data frame from a list

data = {"Names": ["Uche", "Abiola", "Nelly", "Queen", "Ovie", "Belinda", "Ann", "King","Richard", "Adamu"],
        "Age" : [16, 17, 16, 15, 26, 18, 22, 23, 38, 25],
        "Occupation" :["Student", "Student", "Student", "Student", "Lawyer", "Student", "Quality Analyst", "Lab Technician",
                       "Civil Servant", "Personal Assistant"],
        "Height": ["5'7", "6'", "5'8", "5'", "6'2", "5'11", "5'2", "6'2", "6'3", "5'6"],
        "Weight(Kg)": [62, 54, 56, 57, 80, 72, 52, 69, 78, 70], 
        "Gender": ["Male", "Female", "Female", "Female", "Male", "Female", "Female", "Male", "Male", "Male"]}

Serial_No = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

People = pd.DataFrame(data = data, index = Serial_No)
People

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
1,Uche,16,Student,5'7,62,Male
2,Abiola,17,Student,6',54,Female
3,Nelly,16,Student,5'8,56,Female
4,Queen,15,Student,5',57,Female
5,Ovie,26,Lawyer,6'2,80,Male
6,Belinda,18,Student,5'11,72,Female
7,Ann,22,Quality Analyst,5'2,52,Female
8,King,23,Lab Technician,6'2,69,Male
9,Richard,38,Civil Servant,6'3,78,Male
10,Adamu,25,Personal Assistant,5'6,70,Male


### 1. Head  - Returns the first 5 entries in a dataframe

In [69]:
People.head()

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
1,Uche,16,Student,5'7,62,Male
2,Abiola,17,Student,6',54,Female
3,Nelly,16,Student,5'8,56,Female
4,Queen,15,Student,5',57,Female
5,Ovie,26,Lawyer,6'2,80,Male


### 2. Tail - Returns the bottom 5 entries in a dataframe

In [70]:
People.tail()

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
6,Belinda,18,Student,5'11,72,Female
7,Ann,22,Quality Analyst,5'2,52,Female
8,King,23,Lab Technician,6'2,69,Male
9,Richard,38,Civil Servant,6'3,78,Male
10,Adamu,25,Personal Assistant,5'6,70,Male


### 3. Info - Returns general summary of a data frame

In [71]:
People.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 1 to 10
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Names       10 non-null     object
 1   Age         10 non-null     int64 
 2   Occupation  10 non-null     object
 3   Height      10 non-null     object
 4   Weight(Kg)  10 non-null     int64 
 5   Gender      10 non-null     object
dtypes: int64(2), object(4)
memory usage: 560.0+ bytes


### 4. Dtypes - Returns the data type of each column

In [72]:
People.dtypes

Names         object
Age            int64
Occupation    object
Height        object
Weight(Kg)     int64
Gender        object
dtype: object

### 5. Shape - Returns the number of rows and columns

In [73]:
People.shape

(10, 6)

### 6. Size - Returns the size of a dataframe ie, the rows multiplied by the columns (Total no of elements in the data frame)

In [74]:
People.size

60

### 7. Describe - Returns a statistical summary of a dataframe for just the numeric ones

In [75]:
People.describe()

Unnamed: 0,Age,Weight(Kg)
count,10.0,10.0
mean,21.6,65.0
std,7.011102,10.154364
min,15.0,52.0
25%,16.25,56.25
50%,20.0,65.5
75%,24.5,71.5
max,38.0,80.0


### 8. Count - Returns the sum of entries in each column in a data frame

In [76]:
People.count()

Names         10
Age           10
Occupation    10
Height        10
Weight(Kg)    10
Gender        10
dtype: int64

### 9. Sample - Randomly selects any number of samples in the dataframe

In [80]:
People.sample(n = 3)

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
7,Ann,22,Quality Analyst,5'2,52,Female
9,Richard,38,Civil Servant,6'3,78,Male
2,Abiola,17,Student,6',54,Female


### 10. Isnull - Returns true where there are empty values

In [81]:
People.isnull()

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False
10,False,False,False,False,False,False


### 11. Isna - Returns true where there are missing values

In [82]:
People.isna()

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False
10,False,False,False,False,False,False


### 12. Memory_usage - Returns how much memory each column uses in bytes

In [83]:
People.memory_usage()

Index         80
Names         80
Age           80
Occupation    80
Height        80
Weight(Kg)    80
Gender        80
dtype: int64

### 13. nsmallest - Returns the smallest n values in a specified column

In [84]:
People.nsmallest(3, "Weight(Kg)")

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
7,Ann,22,Quality Analyst,5'2,52,Female
2,Abiola,17,Student,6',54,Female
3,Nelly,16,Student,5'8,56,Female


### 14. nlargest - Returns the largest n values in a specified column

In [85]:
People.nlargest(3, "Age")

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
9,Richard,38,Civil Servant,6'3,78,Male
5,Ovie,26,Lawyer,6'2,80,Male
10,Adamu,25,Personal Assistant,5'6,70,Male


### 15. Columns - Returns the names of columms in a dataframe

In [99]:
People.columns

Index(['Names', 'Age', 'Occupation', 'Height', 'Weight(Kg)', 'Gender'], dtype='object')

### 16. Loc - Returns the values of each column in a specified row or label

In [100]:
People.loc[5]

Names           Ovie
Age               26
Occupation    Lawyer
Height           6'2
Weight(Kg)        80
Gender          Male
Name: 5, dtype: object

### 17. iloc - Returns values from specificied rows and columns

In [101]:
People.iloc[3:11, 0:2]
# select row 3- 10 and columns 0 and 1. indexing doesnt include the specified end value.

Unnamed: 0,Names,Age
4,Queen,15
5,Ovie,26
6,Belinda,18
7,Ann,22
8,King,23
9,Richard,38
10,Adamu,25


### 18. GroupBy - Groups the data frame by statistics (mean, median, count, sumz\, etc)  of a speified column

In [102]:
People.groupby(["Gender"]).count()

Unnamed: 0_level_0,Names,Age,Occupation,Height,Weight(Kg)
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,5,5,5,5,5
Male,5,5,5,5,5


### 19. Sort - Returns the data frame sorted in ascending or descending order by a specified column

In [103]:
People.sort_values(by= ["Age", "Weight(Kg)"], ascending= False)

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
9,Richard,38,Civil Servant,6'3,78,Male
5,Ovie,26,Lawyer,6'2,80,Male
10,Adamu,25,Personal Assistant,5'6,70,Male
8,King,23,Lab Technician,6'2,69,Male
7,Ann,22,Quality Analyst,5'2,52,Female
6,Belinda,18,Student,5'11,72,Female
2,Abiola,17,Student,6',54,Female
1,Uche,16,Student,5'7,62,Male
3,Nelly,16,Student,5'8,56,Female
4,Queen,15,Student,5',57,Female


### 20. Query - Filters and returns the values that meets a specified criteria.

In [105]:
People.query("Age < 20")

Unnamed: 0,Names,Age,Occupation,Height,Weight(Kg),Gender
1,Uche,16,Student,5'7,62,Male
2,Abiola,17,Student,6',54,Female
3,Nelly,16,Student,5'8,56,Female
4,Queen,15,Student,5',57,Female
6,Belinda,18,Student,5'11,72,Female
