In [2]:
# importing libraries

import pandas as pd
import numpy as np

# 20 In-built Functions in Pandas

In [7]:
# creating a DataFrame to show how the functions can be applied

df = pd.DataFrame({'name': ['Amaka','Chimee','Bolu','Deborah','Celia','Blessing','Bolanle'], 'age':[12, 14, 16, 18, 20, 22, 24], 'gender':['F','M','F','F','M','F','M']})

df

Unnamed: 0,name,age,gender
0,Amaka,12,F
1,Chimee,14,M
2,Bolu,16,F
3,Deborah,18,F
4,Celia,20,M
5,Blessing,22,F
6,Bolanle,24,M


### 1. df.head()
Used to display rows from the top of a dataframe. 
Returns the first 5 rows by default but the number of rows can be changed by passing in the number of rows you want to display as an argument

In [8]:
df.head()

Unnamed: 0,name,age,gender
0,Amaka,12,F
1,Chimee,14,M
2,Bolu,16,F
3,Deborah,18,F
4,Celia,20,M


### 2. df.tail()
Used to display rows from the bottom of a dataframe. Returns the last 5 rows by default but the number of rows can be changed by passing in the number of rows you want to display as an argument

In [9]:
df.tail()

Unnamed: 0,name,age,gender
2,Bolu,16,F
3,Deborah,18,F
4,Celia,20,M
5,Blessing,22,F
6,Bolanle,24,M


### 3. df.info()
Used to generate descriptive summary statistics of the data in a Pandas DataFrame or Series

In [10]:
df.describe()

Unnamed: 0,age
count,7.0
mean,18.0
std,4.320494
min,12.0
25%,15.0
50%,18.0
75%,21.0
max,24.0


### 4. df.info()
Used to get a concise summary of the dataframe

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    7 non-null      object
 1   age     7 non-null      int64 
 2   gender  7 non-null      object
dtypes: int64(1), object(2)
memory usage: 296.0+ bytes


### 4. df.dtypes()
Shows the datatype of each column in the dataframe

In [15]:
df.dtypes

name      object
age        int64
gender    object
dtype: object

### 5. df.shape
It shows the number of dimensions as well as the size in each dimension. Returns the number of rows and columns in a dataframe

In [16]:
df.shape

(7, 3)

### 6. df.size
Returns the size of a dataframe which is the number of rows multiplied by the number of columns

In [17]:
df.size

21

### 7. df.sample
Selects values randomly from a Series or DataFrame. The number of rows/values to select is passed in as an argument

In [19]:
df.sample(3)

Unnamed: 0,name,age,gender
4,Celia,20,M
3,Deborah,18,F
2,Bolu,16,F


### 8. df.isnull()
Detects missing values. Returns a boolean same-sized object indicating if the values are NA or not

In [20]:
df.isnull()

Unnamed: 0,name,age,gender
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,False


### 9. df.rename()
Used to rename the columns

In [24]:
df.rename(columns = {'name':'Name', 'age':'Age', 'gender':'Gender'})

Unnamed: 0,Name,Age,Gender
0,Amaka,12,F
1,Chimee,14,M
2,Bolu,16,F
3,Deborah,18,F
4,Celia,20,M
5,Blessing,22,F
6,Bolanle,24,M


### 10. drop_duplicates() 
Returns a Pandas DataFrame with duplicate rows removed

In [28]:
df.drop_duplicates()

Unnamed: 0,name,age,gender
0,Amaka,12,F
1,Chimee,14,M
2,Bolu,16,F
3,Deborah,18,F
4,Celia,20,M
5,Blessing,22,F
6,Bolanle,24,M


### 11. df.value_counts
Return a Series containing counts of unique rows in the DataFrame

In [32]:
df['gender'].value_counts()

F    4
M    3
Name: gender, dtype: int64

### 12. df.astype()
Used to cast a pandas object to a specified datatype

In [33]:
df['gender'].astype('category')

0    F
1    M
2    F
3    F
4    M
5    F
6    M
Name: gender, dtype: category
Categories (2, object): ['F', 'M']

### 13. df.groupby()
Used to group a Pandas DataFrame by 1 or more columns, and perform some mathematical operation on it. groupby() can be used to summarize data in a simple manner

In [34]:
df.groupby('gender')['age'].mean()

gender
F    17.000000
M    19.333333
Name: age, dtype: float64

### 14. df.nunique
Counts the number of unique entries over columns or rows

In [35]:
df.nunique()

name      7
age       7
gender    2
dtype: int64

### 15. df.memory_usage() 
Returns how much memory each column uses in bytes.

In [38]:
df.memory_usage()

Index     128
name       56
age        56
gender     56
dtype: int64

### 16. df.nsmallest()
Returns n observations with the smallest values by specified column

In [39]:
df.nsmallest(3, 'age')

Unnamed: 0,name,age,gender
0,Amaka,12,F
1,Chimee,14,M
2,Bolu,16,F


### 17. df.nlargest()
Returns n observations with the largest values by specified column

In [40]:
df.nlargest(4, 'age')

Unnamed: 0,name,age,gender
6,Bolanle,24,M
5,Blessing,22,F
4,Celia,20,M
3,Deborah,18,F


### 18. df.sort_index()
Sorts objects by index labels along the given axis

In [42]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,name,age,gender
6,Bolanle,24,M
5,Blessing,22,F
4,Celia,20,M
3,Deborah,18,F
2,Bolu,16,F
1,Chimee,14,M
0,Amaka,12,F


### 19. df.merge()
Used to merge two Pandas DataFrame objects or a DataFrame and a Series object on a common column.

In [48]:
df2 = pd.DataFrame({'name': ['Amaka','Chimee','Bolu','Deborah','Celia','Blessing','Bolanle'], 'status':['student','employed','employed','student','student','student','employed']})
df2

Unnamed: 0,name,status
0,Amaka,student
1,Chimee,employed
2,Bolu,employed
3,Deborah,student
4,Celia,student
5,Blessing,student
6,Bolanle,employed


In [49]:
df.merge(df2, on='name')

Unnamed: 0,name,age,gender,status
0,Amaka,12,F,student
1,Chimee,14,M,employed
2,Bolu,16,F,employed
3,Deborah,18,F,student
4,Celia,20,M,student
5,Blessing,22,F,student
6,Bolanle,24,M,employed


### 20. df.columns
Displays the variable names (column names) in a dataframe

In [50]:
df.columns

Index(['name', 'age', 'gender'], dtype='object')

# 20 In-built Methods in Numpy

### 1. np.array()
Used to create an array from scratch or convert a list or pandas series object to an array

In [54]:
print(np.array([4,2,6,3,9,8]))

[4 2 6 3 9 8]


### 2. np.arange()
Return evenly spaced values within a given interval.

In [56]:
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [55]:
np.arange(20)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

### 3. np.linspace()
Creates an array in a specified range with equidistant elements

In [59]:
np.linspace(0,10,5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

### 4. np.unique()
Returns the unique elements of an array

In [61]:
np.unique([5,3,1,5,8,5,1,1,9,5,2,4])

array([1, 2, 3, 4, 5, 8, 9])

### 5. np.random.random()
Creates an array with random floats between 0 and 1

In [62]:
np.random.random(6)

array([0.37727026, 0.81249144, 0.1714051 , 0.0612174 , 0.47739888,
       0.54860305])

### 6. np.random.randint()
Creates an array of random integers in any shape

In [64]:
np.random.randint(0,10, size=5)

array([3, 2, 3, 3, 7])

In [65]:
np.random.randint(0,10, size=(2,5))

array([[7, 5, 8, 3, 4],
       [7, 0, 8, 2, 6]])

### 7. np.random.randn()
Returns a sample (or samples) from the "standard normal" distribution.

In [66]:
np.random.randn(10)

array([ 0.59112492,  0.56438387,  0.00566361, -0.42866754, -0.84839747,
       -0.51794763, -0.59175983, -1.49146078,  0.30391807,  0.08619305])

### 8. np.random.shuffle
It modifies the sequence of an array inplace by shuffling its elements

In [69]:
arr = np.array([1,2,3,4,5,6])
np.random.shuffle(arr)
arr

array([4, 3, 1, 5, 2, 6])

### 9. np.reshape()
Changes the shape of an array

In [70]:
arr.reshape(2,3)

array([[4, 3, 1],
       [5, 2, 6]])

### 10. np.expand_dim()
Used to expand the shape of an array

In [82]:
np.expand_dims(arr, axis=1)

array([[4],
       [3],
       [1],
       [5],
       [2],
       [6]])

### 11. np.count_nonzero()
It returns the count of non-zero elements in an array

In [85]:
arr2 = np.random.randint(0,10,100)
np.count_nonzero(arr)

6

### 12. np.argwhere()
It returns the indices of nonzero elements in an array.

In [91]:
arr3 = np.random.randint(5, size=(3,3))
arr3

array([[3, 2, 3],
       [3, 0, 1],
       [0, 1, 2]])

In [92]:
np.argwhere(arr3)

array([[0, 0],
       [0, 1],
       [0, 2],
       [1, 0],
       [1, 2],
       [2, 1],
       [2, 2]], dtype=int64)

### 13. np.ravel()
Returns a flattened array.

In [93]:
np.ravel(arr3)

array([3, 2, 3, 3, 0, 1, 0, 1, 2])

### 14. np.hsplit()
Split an array into multiple sub-arrays horizontally (column-wise)

In [98]:
arr4 = np.random.randint(5, size=(6,6))
arr4

array([[2, 1, 4, 1, 3, 3],
       [4, 2, 1, 2, 3, 3],
       [4, 4, 4, 1, 0, 4],
       [0, 2, 4, 4, 2, 4],
       [0, 3, 1, 0, 0, 1],
       [2, 2, 4, 1, 1, 3]])

In [101]:
np.hsplit(arr4, 3)

[array([[2, 1],
        [4, 2],
        [4, 4],
        [0, 2],
        [0, 3],
        [2, 2]]),
 array([[4, 1],
        [1, 2],
        [4, 1],
        [4, 4],
        [1, 0],
        [4, 1]]),
 array([[3, 3],
        [3, 3],
        [0, 4],
        [2, 4],
        [0, 1],
        [1, 3]])]

### 15. np.vsplit()
Split an array into multiple sub-arrays vertically (row-wise)

In [100]:
np.vsplit(arr4, 2)

[array([[2, 1, 4, 1, 3, 3],
        [4, 2, 1, 2, 3, 3],
        [4, 4, 4, 1, 0, 4]]),
 array([[0, 2, 4, 4, 2, 4],
        [0, 3, 1, 0, 0, 1],
        [2, 2, 4, 1, 1, 3]])]

### 16. np.hstack()
Stack arrays horizontally

In [102]:
a = [1,1,1,1,1]
b = [2,2,2,2,2]

In [104]:
np.hstack((a,b))

array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])

### 17. np.vstack()
Stack arrays in sequence vertically (row wise)

In [105]:
np.vstack((a,b))

array([[1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2]])

### 18. np.transpose()
It transposes an array. In case of 2-dimensional arrays (i.e. matrix), transposing means switching rows and columns.

In [107]:
arr5 = np.random.randint(5, size=(2,4))
arr5

array([[3, 2, 4, 0],
       [1, 4, 4, 4]])

In [108]:
arr5.transpose()

array([[3, 1],
       [2, 4],
       [4, 4],
       [0, 4]])

### 19. np.round()
Rounds up floats to a specified number of decimal points.

In [113]:
arrx = np.random.random(5)
arrx

array([0.90446779, 0.20520656, 0.20472779, 0.5446907 , 0.66406299])

In [114]:
arrx.round(2)

array([0.9 , 0.21, 0.2 , 0.54, 0.66])

### 20. np.eye()
Returns a 2-D array with ones on the diagonal and zeros elsewhere

In [115]:
np.eye(6)

array([[1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.]])