# Built-In Methods in Numpy

In [83]:
import numpy as np

### 1. `arange()`

* arange() is very much similar to Python function range() <br>
* Syntax: arange([start,] stop[, step,], dtype=None) <br>
* Return evenly spaced values within a given interval. <br>

In [84]:
#Example on 'arange' method
np.arange(0,10) # similar to range() in Python, not including 10

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### 2. `linspace(start, end, num_of_points)`
Return evenly spaced numbers over a specified interval.<br>

In [85]:
#Example on 'linspace' method
np.linspace(1, 15, 50) # start from 1 & end at 15 with 50 evenly spaced points b/w 1 to 15.

array([ 1.        ,  1.28571429,  1.57142857,  1.85714286,  2.14285714,
        2.42857143,  2.71428571,  3.        ,  3.28571429,  3.57142857,
        3.85714286,  4.14285714,  4.42857143,  4.71428571,  5.        ,
        5.28571429,  5.57142857,  5.85714286,  6.14285714,  6.42857143,
        6.71428571,  7.        ,  7.28571429,  7.57142857,  7.85714286,
        8.14285714,  8.42857143,  8.71428571,  9.        ,  9.28571429,
        9.57142857,  9.85714286, 10.14285714, 10.42857143, 10.71428571,
       11.        , 11.28571429, 11.57142857, 11.85714286, 12.14285714,
       12.42857143, 12.71428571, 13.        , 13.28571429, 13.57142857,
       13.85714286, 14.14285714, 14.42857143, 14.71428571, 15.        ])

### 3. `zeros()`

* This method creates an array with **all zeros**<br>

In [86]:
np.zeros((4,6)) #(no_row, no_col) passing a tuple

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

### 4. `ones()`

* This method creates an array with **all ones**<br>

In [87]:
np.ones((4,6)) #(no_row, no_col) passing a tuple

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

### 5.  `eye()` 
This method creates an identity matrix must be a square matrix, which is useful in several linear algebra problems.
* Returns a 2-D array with **ones on the diagonal and zeros elsewhere.**

In [88]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### 6.  `rand()`
Create an array of the given shape and populate it with
random samples from a uniform distribution
over ``[0, 1)``.

In [89]:
np.random.rand(3)# 1-D array with three elements

array([0.24177062, 0.2614653 , 0.04219702])

### 7. `randn()`

Returns a sample (or samples) from the "standard normal" or a "Gaussian" distribution. Unlike rand which is uniform.<br>


In [90]:
np.random.randn(2) #1-D array with 2 samples

array([0.86516378, 0.70372851])

### 8. `randint()`
Return random integers from `low` (inclusive) to `high` (exclusive).

In [91]:
np.random.randint(1,100) #returns one random int, 1 inclusive, 100 exclusive

9

### 9. `shape()`

Returns the total number of elements in an array

In [92]:
#Below is an an array of numbers that would be used to explain the next couple of methods
array_arange = np.arange(20)

array_arange

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [93]:
array_arange.shape

(20,)

### 10. `Reshape()`
Returns an array containing the same data with a new shape.

In [94]:
array_arange.reshape(4, 5) # any other num will give error

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

### 11. `max()`
This method is useful for finding maximum values in an array.

In [95]:
array_arange.max()

19

### 12. `min()`
This method is useful for finding minimum values in an array.

In [96]:
array_arange.min()

0

### 13. `argmax()`
This method is used to find the index locations of maximum values in array

In [97]:
array_arange.argmax()

19

### 14. `argmin()`
This method is used to find the index locations of minimum values in array

In [98]:
#Example
array_arange.argmin()

0

### 15. `dtype()`
This methods tells what the data type of the object in the array is

In [99]:
# Example on dtype
array_arange.dtype

dtype('int32')

### 16. `append()`
This method is used to append values to the end of an array

In [100]:
nd1 = np.array([[1,2,3],[4,5,6]])
nd2 = np.array([[7,8,9]])

print(nd1)
print(nd2)
print(np.append(nd1,nd2,0))

[[1 2 3]
 [4 5 6]]
[[7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


### 17. `log()`
This method returns an ndarray with each element as the natural logarithm of the corresponding element in an array

**Note:** An ndarray is a multi-dimensional array of items of the same type and size

In [101]:
nd = np.array([[1,2.71828],[2.71828,1]])

print(nd)
print(np.log(nd))

[[1.      2.71828]
 [2.71828 1.     ]]
[[0.         0.99999933]
 [0.99999933 0.        ]]


### 18. `transpose`
This method reverses or permutes the axes of an ndarray

In [102]:
nd1 = np.array([[1,2,3],[4,5,6]])

print(nd1)
print(np.transpose(nd1))

[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]


### 19. `sum()`
This method sums the elements of an array over a given axis

In [103]:
nd = np.array([[1,2,3],[4,5,6]])

print(np.sum(nd))
print(np.sum(nd, axis=0))
print(np.sum(nd, axis=1))

21
[5 7 9]
[ 6 15]


### 20. `average()`
This method is used for calculating the weighted average along the specified axis

In [105]:
data = np.arange(0,10)
avg = np.average(data)

avg

4.5

# Built-In Methods in Pandas

In [56]:
import pandas as pd
import numpy as np

In [57]:
my_index = 'r1 r2 r3 r4 r5 r6 r7 r8 r9 r10'.split()
my_columns = 'c1 c2 c3 c4 c5 c6 c7 c8 c9 c10'.split()
array_2d = np.arange(0,100).reshape(10,10)

In [58]:
# How the index, columns and array_2d look like!
my_index

['r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10']

In [59]:
my_columns

['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10']

In [60]:
array_2d

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [61]:
# Creating a DataFrame using index, columns and array_2d
df = pd.DataFrame(data = array_2d, index = my_index, columns = my_columns)

In [62]:
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
r1,0,1,2,3,4,5,6,7,8,9
r2,10,11,12,13,14,15,16,17,18,19
r3,20,21,22,23,24,25,26,27,28,29
r4,30,31,32,33,34,35,36,37,38,39
r5,40,41,42,43,44,45,46,47,48,49
r6,50,51,52,53,54,55,56,57,58,59
r7,60,61,62,63,64,65,66,67,68,69
r8,70,71,72,73,74,75,76,77,78,79
r9,80,81,82,83,84,85,86,87,88,89
r10,90,91,92,93,94,95,96,97,98,99


### 1. **`reset_index()`** and **`set_index()`**<br>
We can reset the index of our dataframe to numerical index (which is default index), `inplace = True` to make the permanent change. *The existing index will be a new column.*

In [34]:
df.reset_index(inplace = True)
df

Unnamed: 0,index,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
0,r1,0,1,2,3,4,5,6,7,8,9
1,r2,10,11,12,13,14,15,16,17,18,19
2,r3,20,21,22,23,24,25,26,27,28,29
3,r4,30,31,32,33,34,35,36,37,38,39
4,r5,40,41,42,43,44,45,46,47,48,49
5,r6,50,51,52,53,54,55,56,57,58,59
6,r7,60,61,62,63,64,65,66,67,68,69
7,r8,70,71,72,73,74,75,76,77,78,79
8,r9,80,81,82,83,84,85,86,87,88,89
9,r10,90,91,92,93,94,95,96,97,98,99


### 2. `head()`
This method returns the first n rows in a data set

**Note:** n = 5 by default

In [35]:
df.head()

Unnamed: 0,index,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
0,r1,0,1,2,3,4,5,6,7,8,9
1,r2,10,11,12,13,14,15,16,17,18,19
2,r3,20,21,22,23,24,25,26,27,28,29
3,r4,30,31,32,33,34,35,36,37,38,39
4,r5,40,41,42,43,44,45,46,47,48,49


### 3. `tail()`
This method returns the first n rows in a data set

**Note:** n = 5 by default i.e. if no input is given, it will always show 5 rows


In [36]:
df.tail()

Unnamed: 0,index,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
5,r6,50,51,52,53,54,55,56,57,58,59
6,r7,60,61,62,63,64,65,66,67,68,69
7,r8,70,71,72,73,74,75,76,77,78,79
8,r9,80,81,82,83,84,85,86,87,88,89
9,r10,90,91,92,93,94,95,96,97,98,99


### 4. `shape()`
This method gives a total number of rows and them columns

In [37]:
df.shape

(10, 11)

### 5. `size()`
This methode returns the number of rows times the number of columns in a dataframe

In [38]:
df.size

110

### 6. `info()`
This method helps to give an idea of different information about the dataframe such as rows from RangeIndex, data columns and the data type of each column.

In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   index   10 non-null     object
 1   c1      10 non-null     int32 
 2   c2      10 non-null     int32 
 3   c3      10 non-null     int32 
 4   c4      10 non-null     int32 
 5   c5      10 non-null     int32 
 6   c6      10 non-null     int32 
 7   c7      10 non-null     int32 
 8   c8      10 non-null     int32 
 9   c9      10 non-null     int32 
 10  c10     10 non-null     int32 
dtypes: int32(10), object(1)
memory usage: 608.0+ bytes


### 7. `describe()`
This method generates descriptive statistics that summarize the central tendency, dispersion and shape of a dataset's distribution, excluding `NaN` values.

In [40]:
df.describe()

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0
std,30.276504,30.276504,30.276504,30.276504,30.276504,30.276504,30.276504,30.276504,30.276504,30.276504
min,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0
25%,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5
50%,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0
75%,67.5,68.5,69.5,70.5,71.5,72.5,73.5,74.5,75.5,76.5
max,90.0,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0


### 8. `isna()`
This method give the total number of null values in a dataframe

In [41]:
df.isna()

Unnamed: 0,index,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False


### 9. `isna().sum()`
This method gives the total null values

In [42]:
df.isna().sum()

index    0
c1       0
c2       0
c3       0
c4       0
c5       0
c6       0
c7       0
c8       0
c9       0
c10      0
dtype: int64

### 10. `nunique()`
This method gives all the unique values a variable contains

In [43]:
df.nunique()

index    10
c1       10
c2       10
c3       10
c4       10
c5       10
c6       10
c7       10
c8       10
c9       10
c10      10
dtype: int64

### 11. `Columns`
This method helps us to know the names of all the variables in a dataframe

In [44]:

df.columns

Index(['index', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10'], dtype='object')

### 12. `value_counts()`
This method returns counts of unique values. For example, we can get the unique values of a single variable such as the value_counts for column "c2" in the dataframe

In [45]:
print(df.c2.value_counts())

1     1
11    1
21    1
31    1
41    1
51    1
61    1
71    1
81    1
91    1
Name: c2, dtype: int64


### 13. `read_csv()`
This function helps to read a comma seperated value (csv) file into a pandas dataframe 

In [79]:
df1 = pd.read_csv('Pandas.csv')

df1

Unnamed: 0.1,Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10
0,R1,0,1,2,3.0,4,5,6,7,8,9
1,R2,10,11,12,13.0,14,15,16,17,18,19
2,R3,20,21,22,,24,25,26,27,28,29
3,R4,30,31,32,33.0,34,35,36,37,38,39
4,R5,40,41,42,43.0,44,45,46,47,48,49
5,R6,50,51,52,53.0,54,55,56,57,58,59
6,R7,60,61,62,63.0,64,65,66,67,68,69
7,R8,70,71,72,73.0,74,75,76,77,78,79
8,R9,80,81,82,83.0,84,85,86,87,88,89
9,R10,90,91,92,93.0,94,95,96,97,98,99


### 14. `memory_usage()`
This method returns a Pandas series having the memory of each usage of each column in bytes in a Pandas DataFrame.

**By specifying the deep attribute, we can get to know the actual space taken by each column**

In [48]:
df.memory_usage(deep=True)

Index    128
index    591
c1        40
c2        40
c3        40
c4        40
c5        40
c6        40
c7        40
c8        40
c9        40
c10       40
dtype: int64

### 15. `astype()`

This method is used to cast a python object to a particular data type.

Can be helpful incase data is not stored in the correct format



In [66]:
df_c3 =  df.c3.astype('category')
df_c3

r1      2
r2     12
r3     22
r4     32
r5     42
r6     52
r7     62
r8     72
r9     82
r10    92
Name: c3, dtype: category
Categories (10, int64): [2, 12, 22, 32, ..., 62, 72, 82, 92]

### 16. `loc[:]`
This helps to access a group of rows and columns in a dataset, a slice of the dataset as per requirement

In [64]:
df.loc[['r1','r2'],['c1','c2']]

Unnamed: 0,c1,c2
r1,0,1
r2,10,11


### 17. `drop_duplicates()`

This returns a Pandas DataFrame with duplicate rows removed

In [82]:
df.drop_duplicates(inplace=True)
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
r1,0,1,2,3,4,5,6,7,8,9
r2,10,11,12,13,14,15,16,17,18,19
r3,20,21,22,23,24,25,26,27,28,29
r4,30,31,32,33,34,35,36,37,38,39
r5,40,41,42,43,44,45,46,47,48,49
r6,50,51,52,53,54,55,56,57,58,59
r7,60,61,62,63,64,65,66,67,68,69
r8,70,71,72,73,74,75,76,77,78,79
r9,80,81,82,83,84,85,86,87,88,89
r10,90,91,92,93,94,95,96,97,98,99


### 18. `sort_values()`
This is used to sort columns in a  Pandas DataFrame by ascending or descending order

In [69]:
df.sort_values(by='c1', inplace=True)
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
r1,0,1,2,3,4,5,6,7,8,9
r2,10,11,12,13,14,15,16,17,18,19
r3,20,21,22,23,24,25,26,27,28,29
r4,30,31,32,33,34,35,36,37,38,39
r5,40,41,42,43,44,45,46,47,48,49
r6,50,51,52,53,54,55,56,57,58,59
r7,60,61,62,63,64,65,66,67,68,69
r8,70,71,72,73,74,75,76,77,78,79
r9,80,81,82,83,84,85,86,87,88,89
r10,90,91,92,93,94,95,96,97,98,99


### 19. `groupby()`
This is used to group a Pandas DataFrame by 1 or more columns and perform some mathematical operations on them

In [74]:
df.groupby(by='c3')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001ED8E22EF10>

### 20. `fillna()`
This helps to replace all NaN values in a DataFrame or Series by imputimg these values with more appropraite values

In [81]:
df1['C3'].fillna(23, inplace=True)
df

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10
r1,0,1,2,3,4,5,6,7,8,9
r2,10,11,12,13,14,15,16,17,18,19
r3,20,21,22,23,24,25,26,27,28,29
r4,30,31,32,33,34,35,36,37,38,39
r5,40,41,42,43,44,45,46,47,48,49
r6,50,51,52,53,54,55,56,57,58,59
r7,60,61,62,63,64,65,66,67,68,69
r8,70,71,72,73,74,75,76,77,78,79
r9,80,81,82,83,84,85,86,87,88,89
r10,90,91,92,93,94,95,96,97,98,99
