## Setting Index in Pandas DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(101)
df = pd.DataFrame(data = np.random.randn(6,5), index = "A B C D E F".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [3]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [4]:
df.reset_index()

Unnamed: 0,index,VAL1,VAL2,VAL3,VAL4,VAL5
0,A,2.70685,0.628133,0.907969,0.503826,0.651118
1,B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
2,C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
3,D,0.955057,0.190794,1.978757,2.605967,0.683509
4,E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
5,F,0.390528,0.166905,0.184502,0.807706,0.07296


In [5]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [6]:
df.reset_index(drop = True, inplace = True)

In [7]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
0,2.70685,0.628133,0.907969,0.503826,0.651118
1,-0.319318,-0.848077,0.605965,-2.018168,0.740122
2,0.528813,-0.589001,0.188695,-0.758872,-0.933237
3,0.955057,0.190794,1.978757,2.605967,0.683509
4,0.302665,1.693723,-1.706086,-1.159119,-0.134841
5,0.390528,0.166905,0.184502,0.807706,0.07296


In [8]:
example = "TR F NL RUS AUS AZ".split()

In [9]:
example

['TR', 'F', 'NL', 'RUS', 'AUS', 'AZ']

In [10]:
df["new_index"] = example

In [11]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5,new_index
0,2.70685,0.628133,0.907969,0.503826,0.651118,TR
1,-0.319318,-0.848077,0.605965,-2.018168,0.740122,F
2,0.528813,-0.589001,0.188695,-0.758872,-0.933237,NL
3,0.955057,0.190794,1.978757,2.605967,0.683509,RUS
4,0.302665,1.693723,-1.706086,-1.159119,-0.134841,AUS
5,0.390528,0.166905,0.184502,0.807706,0.07296,AZ


In [12]:
df.set_index("new_index")

Unnamed: 0_level_0,VAL1,VAL2,VAL3,VAL4,VAL5
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TR,2.70685,0.628133,0.907969,0.503826,0.651118
F,-0.319318,-0.848077,0.605965,-2.018168,0.740122
NL,0.528813,-0.589001,0.188695,-0.758872,-0.933237
RUS,0.955057,0.190794,1.978757,2.605967,0.683509
AUS,0.302665,1.693723,-1.706086,-1.159119,-0.134841
AZ,0.390528,0.166905,0.184502,0.807706,0.07296


In [13]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5,new_index
0,2.70685,0.628133,0.907969,0.503826,0.651118,TR
1,-0.319318,-0.848077,0.605965,-2.018168,0.740122,F
2,0.528813,-0.589001,0.188695,-0.758872,-0.933237,NL
3,0.955057,0.190794,1.978757,2.605967,0.683509,RUS
4,0.302665,1.693723,-1.706086,-1.159119,-0.134841,AUS
5,0.390528,0.166905,0.184502,0.807706,0.07296,AZ


In [14]:
df.set_index("new_index", inplace = True)

In [15]:
df

Unnamed: 0_level_0,VAL1,VAL2,VAL3,VAL4,VAL5
new_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TR,2.70685,0.628133,0.907969,0.503826,0.651118
F,-0.319318,-0.848077,0.605965,-2.018168,0.740122
NL,0.528813,-0.589001,0.188695,-0.758872,-0.933237
RUS,0.955057,0.190794,1.978757,2.605967,0.683509
AUS,0.302665,1.693723,-1.706086,-1.159119,-0.134841
AZ,0.390528,0.166905,0.184502,0.807706,0.07296


## Multi-Index and Index Hierarchy in Pandas DataFrames

In [16]:
import numpy as np
import pandas as pd

In [17]:
inside = ["class A", "class B", "class C", "class A", "class B", "class C" ]
outside = ['school1', 'school1', 'school1', 'school2', 'school2', 'school2']

In [18]:
zip(outside, inside)

<zip at 0x1d02007f780>

In [19]:
multi_index = list(zip(outside, inside))

In [20]:
multi_index

[('school1', 'class A'),
 ('school1', 'class B'),
 ('school1', 'class C'),
 ('school2', 'class A'),
 ('school2', 'class B'),
 ('school2', 'class C')]

In [21]:
hier_index = pd.MultiIndex.from_tuples(multi_index)

In [22]:
hier_index

MultiIndex([('school1', 'class A'),
            ('school1', 'class B'),
            ('school1', 'class C'),
            ('school2', 'class A'),
            ('school2', 'class B'),
            ('school2', 'class C')],
           )

In [23]:
np.random.seed(101)
df = pd.DataFrame(np.random.randint(70, 100, size = (6, 2)), index = hier_index, columns = ["1st_semester", "2st_semester"])

In [24]:
df

Unnamed: 0,Unnamed: 1,1st_semester,2st_semester
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


## Multi-Index and Index Hierarchy in Pandas DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
inside = ["class A", "class B", "class C", "class A", "class B", "class C" ]
outside = ['school1', 'school1', 'school1', 'school2', 'school2', 'school2']

multi_index = list(zip(outside, inside))

hier_index = pd.MultiIndex.from_tuples(multi_index)#создаем отдельно структуру под вложенные индексы

np.random.seed(101)#генерируем датафрейм
df = pd.DataFrame(np.random.randint(70, 100, size = (6, 2)), index = hier_index, columns = ["1st_semester", "2st_semester"])

In [27]:
df

Unnamed: 0,Unnamed: 1,1st_semester,2st_semester
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


In [28]:
df["1st_semester"]

school1  class A    81
         class B    76
         class C    99
school2  class A    85
         class B    83
         class C    74
Name: 1st_semester, dtype: int32

In [29]:
df[["1st_semester"]]

Unnamed: 0,Unnamed: 1,1st_semester
school1,class A,81
school1,class B,76
school1,class C,99
school2,class A,85
school2,class B,83
school2,class C,74


In [30]:
df[["2st_semester"]]

Unnamed: 0,Unnamed: 1,2st_semester
school1,class A,87
school1,class B,93
school1,class C,81
school2,class A,79
school2,class B,78
school2,class C,78


In [31]:
df

Unnamed: 0,Unnamed: 1,1st_semester,2st_semester
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


In [32]:
df.loc["school1"]

Unnamed: 0,1st_semester,2st_semester
class A,81,87
class B,76,93
class C,99,81


In [33]:
df.loc["school1"].loc["class B"]

1st_semester    76
2st_semester    93
Name: class B, dtype: int32

In [34]:
df.loc["school1"].loc[["class B"]]

Unnamed: 0,1st_semester,2st_semester
class B,76,93


In [35]:
df

Unnamed: 0,Unnamed: 1,1st_semester,2st_semester
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


In [36]:
df.index

MultiIndex([('school1', 'class A'),
            ('school1', 'class B'),
            ('school1', 'class C'),
            ('school2', 'class A'),
            ('school2', 'class B'),
            ('school2', 'class C')],
           )

In [37]:
df.index.names

FrozenList([None, None])

In [38]:
df.index.names = ["schools", "classes"]

In [39]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,1st_semester,2st_semester
schools,classes,Unnamed: 2_level_1,Unnamed: 3_level_1
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


## Selecting Elements Using the xs() Function in Multi-Indexed DataFrames

In [40]:
#функция для отбора информации напрямую
import numpy as np
import pandas as pd

In [41]:
inside = ["class A", "class B", "class C", "class A", "class B", "class C" ]
outside = ['school1', 'school1', 'school1', 'school2', 'school2', 'school2']

multi_index = list(zip(outside, inside))

hier_index = pd.MultiIndex.from_tuples(multi_index)

np.random.seed(101)
df = pd.DataFrame(np.random.randint(70, 100, size = (6, 2)), index = hier_index, columns = ["1st_semester", "2st_semester"])

df.index.names = ["schools", "classes"]

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,1st_semester,2st_semester
schools,classes,Unnamed: 2_level_1,Unnamed: 3_level_1
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


In [43]:
df.xs("school2")

Unnamed: 0_level_0,1st_semester,2st_semester
classes,Unnamed: 1_level_1,Unnamed: 2_level_1
class A,85,79
class B,83,78
class C,74,78


In [44]:
df.xs(("school2", "class A"))

1st_semester    85
2st_semester    79
Name: (school2, class A), dtype: int32

In [45]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,1st_semester,2st_semester
schools,classes,Unnamed: 2_level_1,Unnamed: 3_level_1
school1,class A,81,87
school1,class B,76,93
school1,class C,99,81
school2,class A,85,79
school2,class B,83,78
school2,class C,74,78


In [46]:
df.xs(("school2", "class A"), level = [0, 1])

Unnamed: 0_level_0,Unnamed: 1_level_0,1st_semester,2st_semester
schools,classes,Unnamed: 2_level_1,Unnamed: 3_level_1
school2,class A,85,79


In [47]:
df.xs("class A", level = "classes")

Unnamed: 0_level_0,1st_semester,2st_semester
schools,Unnamed: 1_level_1,Unnamed: 2_level_1
school1,81,87
school2,85,79


In [48]:
df.xs("class A", level = 1)

Unnamed: 0_level_0,1st_semester,2st_semester
schools,Unnamed: 1_level_1,Unnamed: 2_level_1
school1,81,87
school2,85,79
