## DATAFRAME

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.arange(20,0,-1).reshape(5,-1),
                  index= ['a','b','c','d','e'], columns = ['A','B','C','D'])

print(df)

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12  11  10   9
d   8   7   6   5
e   4   3   2   1


#### Replacing values in DataFrame

Syntax-

#### df.replace( toreplace = None, value = None, inplace = False, method = 'pad')

toreplace - str, list, dict, Series, int, float, None, regex

value - scalar, dict, list, str, regex, default = None

method - {'pad','ffill','bfill',None}

In [3]:
df.replace(5,21) # Replaces 5 with 21
# We haven't done inplace = True, so it will be temporary change till this particular command
df 

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,11,10,9
d,8,7,6,5
e,4,3,2,1


In [4]:
df.replace(5,21,inplace=True) # Replaces 5 with 21
df


Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,11,10,9
d,8,7,6,21
e,4,3,2,1


In [7]:
df.replace(21,11,inplace=True) # Replaces 5 with 21
df

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,11,10,9
d,8,7,6,11
e,4,3,2,1


In [5]:
df.replace(11,4,inplace=True) 
df

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,4,10,9
d,8,7,6,21
e,4,3,2,1


In [6]:
# List 'toreplace' & Scalar 'value'
df.replace([1,2,3],4)

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,4,10,9
d,8,7,6,21
e,4,4,4,4


In [7]:
# List 'to replace' & 'value'
df.replace([1,2,3,4],[11,12,13,14])

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,14,10,9
d,8,7,6,21
e,14,13,12,11


In [8]:
# Method - Backward fill (method = 'bfill')
print(df)
df.replace([17,2,7,14], method = 'bfill') # Value will be replaced with value present just below the element 

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12   4  10   9
d   8   7   6  21
e   4   3   2   1


Unnamed: 0,A,B,C,D
a,20,19,18,13
b,16,15,10,13
c,12,4,10,9
d,8,3,6,21
e,4,3,2,1


In [7]:
# Method - Foward fill (method = 'ffill')
print(df)
df.replace([5,2,7,14,19], method = 'ffill')

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12  11  10   9
d   8   7   6   5
e   4   3   2   1


Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,18,13
c,12,11,10,9
d,8,11,6,9
e,4,3,6,1


In [9]:
# Dict type 'toreplace'

print(df)

df.replace( {1:'One',2:'Two',11:'Eleven',20:'Twenty',17 : 170})

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12   4  10   9
d   8   7   6  21
e   4   3   2   1


Unnamed: 0,A,B,C,D
a,Twenty,19,18,170
b,16,15,14,13
c,12,4,10,9
d,8,7,6,21
e,4,3,Two,One


In [10]:
df

Unnamed: 0,A,B,C,D
a,20,19,18,17
b,16,15,14,13
c,12,4,10,9
d,8,7,6,21
e,4,3,2,1


In [11]:
# Dict type 'toreplace', Scalar 'values'
# Replace values specific column wise
# Replacing in only columns A & B

print(df)

df.replace( {'A':[1,8,16],'B':15},100 )

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12   4  10   9
d   8   7   6  21
e   4   3   2   1


Unnamed: 0,A,B,C,D
a,20,19,18,17
b,100,100,14,13
c,12,4,10,9
d,100,7,6,21
e,4,3,2,1


In [9]:
# Dict type 'toreplace', 'values'
# Replace values specific column wise

print(df)

df.replace({'A': {0:100,4:400,8:800},
           'B':{7:'Seven',3:'Three'}})

df.A.replace(20,21)

    A   B   C   D
a  20  19  18  17
b  16  15  14  13
c  12  11  10   9
d   8   7   6  11
e   4   3   2   1


a    21
b    16
c    12
d     8
e     4
Name: A, dtype: int32

In [16]:
df[["A","C"]].replace([28,20,14],[20,13,15])

Unnamed: 0,A,C
a,13,18
b,16,15
c,12,10
d,8,6
e,4,2


#### NaN values are represented/specified with numpy as - np.nan

#### Check NaN values (Gives True/False)  -    .isnull() /   .isna()

In [21]:
# Replacing values with Nan

# print(df,'\n')

# print(df.replace([20,15,8,1],'any'),'\n')

df5 = df.replace([20,15,8,1],np.nan)

print(df5.isna().sum())

df5[['A','B']].isna().sum()

A    2
B    1
C    0
D    1
dtype: int64


A    2
B    1
dtype: int64

In [22]:
df5

Unnamed: 0,A,B,C,D
a,,19.0,18,17.0
b,16.0,,14,13.0
c,12.0,4.0,10,9.0
d,,7.0,6,4.0
e,4.0,3.0,2,


#### View the dataframe in form of array

In form of arrays , we can easily perform faster operations

In [23]:
# View the dataframe in form of array
df.values

array([[20, 19, 18, 17],
       [16, 15, 14, 13],
       [12,  4, 10,  9],
       [ 8,  7,  6,  4],
       [ 4,  3,  2,  1]])

#### Checking Dimension of DataFrame

In [24]:
# Getting Dimensions
df.ndim

2

##### DataFrame columns are in form of series

In [25]:
df['B']
df[['C','A','D']]

Unnamed: 0,C,A,D
a,18,20,17
b,14,16,13
c,10,12,9
d,6,8,4
e,2,4,1


In [26]:
df.B

a    19
b    15
c     4
d     7
e     3
Name: B, dtype: int32

In [27]:
# Checking datatype (of column or anything)
type(df['A'])

pandas.core.series.Series


#### Creating a new column

In [28]:
# Creating new column by performing operations on columns
df['E'] = df['A'] / df['B']
df['F'] = df['E'].round(2)
df['G'] = list(np.arange(0.4, 5.4))
df

Unnamed: 0,A,B,C,D,E,F,G
a,20,19,18,17,1.052632,1.05,0.4
b,16,15,14,13,1.066667,1.07,1.4
c,12,4,10,9,3.0,3.0,2.4
d,8,7,6,4,1.142857,1.14,3.4
e,4,3,2,1,1.333333,1.33,4.4


In [29]:
df['F'] = df['D'].astype(float)
df

Unnamed: 0,A,B,C,D,E,F,G
a,20,19,18,17,1.052632,17.0,0.4
b,16,15,14,13,1.066667,13.0,1.4
c,12,4,10,9,3.0,9.0,2.4
d,8,7,6,4,1.142857,4.0,3.4
e,4,3,2,1,1.333333,1.0,4.4


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, a to e
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       5 non-null      int32  
 1   B       5 non-null      int32  
 2   C       5 non-null      int32  
 3   D       5 non-null      int32  
 4   E       5 non-null      float64
 5   F       5 non-null      float64
 6   G       5 non-null      float64
dtypes: float64(3), int32(4)
memory usage: 400.0+ bytes


#### Removing a column

In [34]:
# print(df)
# Here 1 represents column axis = (0-Row,1-Column)
# inplace = True : For permanent change

df.drop(['G','A','D'],axis = 1)
df.drop(['a',"c"],axis = 0)        #we can remove riws without using axis as it by default removes row.

Unnamed: 0,A,B,C,D,E,F,G
b,16,15,14,13,1.066667,13.0,1.4
d,8,7,6,4,1.142857,4.0,3.4
e,4,3,2,1,1.333333,1.0,4.4


#### Removing row

In [35]:
print(df)

# By default axis = 0, i.e A row with index 'e' will be removed

df.drop('e')  # Change not permanent as inplace = False by default
df.drop(['b','a','d'],0, inplace = True)

    A   B   C   D         E     F    G
a  20  19  18  17  1.052632  17.0  0.4
b  16  15  14  13  1.066667  13.0  1.4
c  12   4  10   9  3.000000   9.0  2.4
d   8   7   6   4  1.142857   4.0  3.4
e   4   3   2   1  1.333333   1.0  4.4


In [1]:
import cowsay

ModuleNotFoundError: No module named 'cowsay'