# Data Frames

In [2]:
import numpy as np
import pandas as pd

In [3]:
new_data = np.arange(0,25).reshape(5,5)
print(new_data)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


## Creating the data frame

In [4]:
import pandas as pd
data = pd.DataFrame(data = new_data, index = ["R1","R2","R3","R4","R5"], columns = ["C1","C2","C3","C4","C5"])
print(data)

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


In [6]:
frame = pd.DataFrame(new_data,["R1","R2","R3","R4","R5"], ["C1","C2","C3","C4","C5"])
print(frame)

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


# Indexing

## Column indexing

In [7]:
print(frame["C3"])

R1     2
R2     7
R3    12
R4    17
R5    22
Name: C3, dtype: int32


In [8]:
print(type(frame["C3"]))

<class 'pandas.core.series.Series'>


In [9]:
print(frame[["C3","C1"]])

    C3  C1
R1   2   0
R2   7   5
R3  12  10
R4  17  15
R5  22  20


In [10]:
print(frame.iloc[:,[0,1]])

    C1  C2
R1   0   1
R2   5   6
R3  10  11
R4  15  16
R5  20  21


In [11]:
print(frame.iloc[0])

C1    0
C2    1
C3    2
C4    3
C5    4
Name: R1, dtype: int32


In [13]:
print(frame.iloc[0,4])

4


## Row indexing

In [14]:
print(frame.loc["R1"])

C1    0
C2    1
C3    2
C4    3
C5    4
Name: R1, dtype: int32


In [15]:
print(frame.loc[["R1","R3"]])

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R3  10  11  12  13  14


In [16]:
print(frame.iloc[0])

C1    0
C2    1
C3    2
C4    3
C5    4
Name: R1, dtype: int32


## Row and Column indexing

In [17]:
print(frame.loc["R1","C3"])

2


In [21]:
print(frame.loc[["R2","R4"],["C2","C1"]])

    C2  C1
R2   6   5
R4  16  15


### Creating new column

In [22]:
frame["C6"] = ["100",'200','300','400','500']
print(frame)

    C1  C2  C3  C4  C5   C6
R1   0   1   2   3   4  100
R2   5   6   7   8   9  200
R3  10  11  12  13  14  300
R4  15  16  17  18  19  400
R5  20  21  22  23  24  500


### Deleting the columns

In [26]:
print(frame.drop(["C6"],axis = 1))

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


In [27]:
print(frame)     #dropped labels are not reflected in frame

    C1  C2  C3  C4  C5   C6
R1   0   1   2   3   4  100
R2   5   6   7   8   9  200
R3  10  11  12  13  14  300
R4  15  16  17  18  19  400
R5  20  21  22  23  24  500


In [29]:
frame.drop(["C6"],axis = 1,inplace = True)
print(frame)

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


In [30]:
print(frame)           #Change is reflected

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


## Creating new rows

In [52]:
frame.loc["R6"] = [99, 100, 101, 102, 103]

In [53]:
print(frame)

    C1   C2   C3   C4   C5
R1   0    1    2    3    4
R2   5    6    7    8    9
R3  10   11   12   13   14
R4  15   16   17   18   19
R5  20   21   22   23   24
R6  99  100  101  102  103


## Deleting rows

In [54]:
#Temporary deletion
frame.drop(["R6"])        #or "R6", axis = 0 but by default axis is 0 
print(frame)

    C1   C2   C3   C4   C5
R1   0    1    2    3    4
R2   5    6    7    8    9
R3  10   11   12   13   14
R4  15   16   17   18   19
R5  20   21   22   23   24
R6  99  100  101  102  103


In [34]:
print(frame)

    C1   C2   C3   C4   C5
R1   0    1    2    3    4
R2   5    6    7    8    9
R3  10   11   12   13   14
R4  15   16   17   18   19
R5  20   21   22   23   24
R6  99  100  101  102  103


In [55]:
frame.drop(["R6"], inplace = True)
print(frame)

    C1  C2  C3  C4  C5
R1   0   1   2   3   4
R2   5   6   7   8   9
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


## Conditional Indexing

In [56]:
print([frame>10])   #print all the boolean for values 

[       C1     C2     C3     C4     C5
R1  False  False  False  False  False
R2  False  False  False  False  False
R3  False   True   True   True   True
R4   True   True   True   True   True
R5   True   True   True   True   True]


In [57]:
print(frame[frame >10])   #prints element greater than 10

      C1    C2    C3    C4    C5
R1   NaN   NaN   NaN   NaN   NaN
R2   NaN   NaN   NaN   NaN   NaN
R3   NaN  11.0  12.0  13.0  14.0
R4  15.0  16.0  17.0  18.0  19.0
R5  20.0  21.0  22.0  23.0  24.0


In [59]:
print(frame["C2"]> 6)    #print boolean for elements of column 2 with condition that elements are greater than 6

R1    False
R2    False
R3     True
R4     True
R5     True
Name: C2, dtype: bool


In [61]:
print(frame[frame["C2"]>6])
#print all rows for which C2 > 6 is true

    C1  C2  C3  C4  C5
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


In [63]:
print(frame[frame["C2"]> 6]["C2"])     #print all rows of c2 for which c2 >6

R3    11
R4    16
R5    21
Name: C2, dtype: int64


In [64]:
print(frame[(frame["C2"]>6) & (frame["C3"]>10)])

    C1  C2  C3  C4  C5
R3  10  11  12  13  14
R4  15  16  17  18  19
R5  20  21  22  23  24


# Index Manipulation

In [66]:
print(frame.reset_index())

  index  C1  C2  C3  C4  C5
0    R1   0   1   2   3   4
1    R2   5   6   7   8   9
2    R3  10  11  12  13  14
3    R4  15  16  17  18  19
4    R5  20  21  22  23  24


In [67]:
frame["new"] = ["R11","R22","R33","R44","R55"]
frame.set_index("new")
print(frame)

    C1  C2  C3  C4  C5  new
R1   0   1   2   3   4  R11
R2   5   6   7   8   9  R22
R3  10  11  12  13  14  R33
R4  15  16  17  18  19  R44
R5  20  21  22  23  24  R55


In [69]:
print(frame.axes)

[Index(['R1', 'R2', 'R3', 'R4', 'R5'], dtype='object'), Index(['C1', 'C2', 'C3', 'C4', 'C5', 'new'], dtype='object')]


In [73]:
frame.rename_axis("columns",axis = 1,inplace = True)
frame.rename_axis("rows",axis = 0,inplace = True)

In [74]:
print(frame)

columns  C1  C2  C3  C4  C5  new
rows                            
R1        0   1   2   3   4  R11
R2        5   6   7   8   9  R22
R3       10  11  12  13  14  R33
R4       15  16  17  18  19  R44
R5       20  21  22  23  24  R55


In [76]:
frame.columns = ["a","b","c","d","e","f"]

In [77]:
print(frame)

       a   b   c   d   e    f
rows                         
R1     0   1   2   3   4  R11
R2     5   6   7   8   9  R22
R3    10  11  12  13  14  R33
R4    15  16  17  18  19  R44
R5    20  21  22  23  24  R55
