In [1]:
import numpy as np

# Function arange creates numpy array.
np.arange(2, 7)

array([2, 3, 4, 5, 6])

In [17]:
import pandas as pd

# Data of this dataframe is a dictionary (where key is a tuple, value is a numpy array).
# multiindex a multistlpec (viacurovnovy index a viacurovnovy stlpec)
# We can achieve multiindex using tuple.

df = pd.DataFrame(
    data = {
        ("player1", "x") : np.arange(2, 7),
        ("player1", "y") : np.arange(2, 7),
        ("player2", "x") : np.arange(2, 7),
        ("player2", "y") : np.arange(2, 7),
    },
    index = [("player1", "x"), ("player1", "x"), ("player1", "x"), ("player1", "x"), ("player1", "x")]
) 
df

Unnamed: 0_level_0,player1,player1,player2,player2
Unnamed: 0_level_1,x,y,x,y
"(player1, x)",2,2,2,2
"(player1, x)",3,3,3,3
"(player1, x)",4,4,4,4
"(player1, x)",5,5,5,5
"(player1, x)",6,6,6,6


In [18]:
# Python returns multiindex.

df.columns

MultiIndex([('player1', 'x'),
            ('player1', 'y'),
            ('player2', 'x'),
            ('player2', 'y')],
           )

In [22]:
df.index   # Output is just index, not multiindex!!

Index([('player1', 'x'), ('player1', 'x'), ('player1', 'x'), ('player1', 'x'),
       ('player1', 'x')],
      dtype='object')

In [23]:
df["player1"]

Unnamed: 0,x,y
"(player1, x)",2,2
"(player1, x)",3,3
"(player1, x)",4,4
"(player1, x)",5,5
"(player1, x)",6,6


In [24]:
df["player1"]["x"]

(player1, x)    2
(player1, x)    3
(player1, x)    4
(player1, x)    5
(player1, x)    6
Name: x, dtype: int32

In [25]:
# The example above using function loc.

df.loc[:, ("player1", "x")]

(player1, x)    2
(player1, x)    3
(player1, x)    4
(player1, x)    5
(player1, x)    6
Name: (player1, x), dtype: int32

In [27]:
# Changing values in column (the same as in previous lesson Pandas_fundamentals_IV).

df.loc[:, ("player1", "x")] = 1
df

Unnamed: 0_level_0,player1,player1,player2,player2
Unnamed: 0_level_1,x,y,x,y
"(player1, x)",1,2,2,2
"(player1, x)",1,3,3,3
"(player1, x)",1,4,4,4
"(player1, x)",1,5,5,5
"(player1, x)",1,6,6,6


In [29]:
df2 = pd.DataFrame(
    data = {
        "a" : np.arange(1, 10),
        "b" : np.arange(10, 19)
    })

df2

Unnamed: 0,a,b
0,1,10
1,2,11
2,3,12
3,4,13
4,5,14
5,6,15
6,7,16
7,8,17
8,9,18


In [32]:
# Adding new column 'c'.

df2["c"] = np.arange(20, 29)
df2

Unnamed: 0,a,b,c
0,1,10,20
1,2,11,21
2,3,12,22
3,4,13,23
4,5,14,24
5,6,15,25
6,7,16,26
7,8,17,27
8,9,18,28


In [33]:
# Adding new column 'd' with values (a + b)**2.

df2["d"] = (df2["a"] + df2["b"])**2
df2

Unnamed: 0,a,b,c,d
0,1,10,20,121
1,2,11,21,169
2,3,12,22,225
3,4,13,23,289
4,5,14,24,361
5,6,15,25,441
6,7,16,26,529
7,8,17,27,625
8,9,18,28,729


In [35]:
# Adding new row using loc function. It is needed to specify the index of new row.

df2.loc[9] = [0, 1, 2, 3]
df2

Unnamed: 0,a,b,c,d
0,1,10,20,121
1,2,11,21,169
2,3,12,22,225
3,4,13,23,289
4,5,14,24,361
5,6,15,25,441
6,7,16,26,529
7,8,17,27,625
8,9,18,28,729
9,0,1,2,3


In [38]:
# Deleting column.

df2.columns.difference(["c"])

Index(['a', 'b', 'd'], dtype='object')

In [39]:
df2[df2.columns.difference(["c"])]

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625
8,9,18,729
9,0,1,3


In [42]:
# The same example as above using function loc.

df2.loc[:, df2.columns.difference(["c"])]      # Result is view. It means that the original dataframe is unchanged. See below.

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625
8,9,18,729
9,0,1,3


In [43]:
df2

Unnamed: 0,a,b,c,d
0,1,10,20,121
1,2,11,21,169
2,3,12,22,225
3,4,13,23,289
4,5,14,24,361
5,6,15,25,441
6,7,16,26,529
7,8,17,27,625
8,9,18,28,729
9,0,1,2,3


In [45]:
# Deleting column using function drop.
# axis = 1 is for columns, axis = 0 is for row.

df2.drop("c", axis = 1)   # Again, the original dataframe df2 is unchanged. See below.

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625
8,9,18,729
9,0,1,3


In [46]:
df2

Unnamed: 0,a,b,c,d
0,1,10,20,121
1,2,11,21,169
2,3,12,22,225
3,4,13,23,289
4,5,14,24,361
5,6,15,25,441
6,7,16,26,529
7,8,17,27,625
8,9,18,28,729
9,0,1,2,3


In [47]:
# If I want to change the original dataframe df2 (delete a column), set inplace on True.

df2.drop("c", axis = 1, inplace = True)
df2

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625
8,9,18,729
9,0,1,3


In [48]:
df2

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625
8,9,18,729
9,0,1,3


In [54]:
# Deleting rows. 8 is index of the row I want to delete. True means delete permanently.

df2.drop(8 , axis = 0, inplace = True)
df2

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
3,4,13,289
4,5,14,361
5,6,15,441
6,7,16,529
7,8,17,625


In [55]:
# Deleting rows using condition. Delete all rows where 'a' is >=4 and <=6.

df2[(df2.a >= 4) & (df2.a <= 6)]

Unnamed: 0,a,b,d
3,4,13,289
4,5,14,361
5,6,15,441


In [56]:
# Function drop needs indexes of all rows which I want to delete.

df2[(df2.a >= 4) & (df2.a <= 6)].index

Int64Index([3, 4, 5], dtype='int64')

In [57]:
df2.drop(df2[(df2.a >= 4) & (df2.a <= 6)].index, axis = 0, inplace = True)
df2

Unnamed: 0,a,b,d
0,1,10,121
1,2,11,169
2,3,12,225
6,7,16,529
7,8,17,625
