In [1]:
import pandas as pd
import numpy as np

# Мультииндекс для Series
index = [
    ('city_1', 2010),
    ('city_1', 2020),
    ('city_2', 2010),
    ('city_2', 2020),
    ('city_3', 2010),
    ('city_3', 2020),
]
population = [101, 201, 102, 202, 103, 203]

pop = pd.Series(population, index=pd.MultiIndex.from_tuples(index, names=['city', 'year']))

# DataFrame на основе Series
pop_df = pd.DataFrame(
    {
        'total': pop,
        'something': [10, 11, 12, 13, 14, 15],
    }
)

print("DataFrame с мультииндексом:")
print(pop_df)

DataFrame с мультииндексом:
             total  something
city   year                  
city_1 2010    101         10
       2020    201         11
city_2 2010    102         12
       2020    202         13
city_3 2010    103         14
       2020    203         15


In [2]:
pop_df_1 = pop_df.loc[('city_1'), 'something']
print(pop_df_1)

year
2010    10
2020    11
Name: something, dtype: int64


In [3]:
pop_df_2 = pop_df.loc[[('city_1', 2010), ('city_1', 2020), ('city_3', 2010), ('city_3', 2020)], ['total', 'something']]
print(pop_df_2)

             total  something
city   year                  
city_1 2010    101         10
       2020    201         11
city_3 2010    103         14
       2020    203         15


In [4]:
pop_df_3 = pop_df.loc[[('city_1', 2010), ('city_1', 2020), ('city_3', 2010), ('city_3', 2020)], 'something']
print(pop_df_3)

city    year
city_1  2010    10
        2020    11
city_3  2010    14
        2020    15
Name: something, dtype: int64


In [5]:
# Пример DataFrame с мультииндексом
index = pd.MultiIndex.from_product(
    [['city_1', 'city_2'], [2010, 2020]],
    names=['city', 'year']
)
columns = pd.MultiIndex.from_product(
    [['person_1', 'person_2', 'person_3'], ['job_1', 'job_2']],
    names=['worker', 'job']
)

data = np.random.randint(1, 100, size=(4, 6))
df = pd.DataFrame(data, index=index, columns=columns)

print("\nDataFrame для работы:")
print(df)


DataFrame для работы:
worker      person_1       person_2       person_3      
job            job_1 job_2    job_1 job_2    job_1 job_2
city   year                                             
city_1 2010       60    93       66    28       17    29
       2020       45    10       70    98       40    13
city_2 2010       82     9       30    15       95    23
       2020       86    37       27    50       98    38


In [6]:
df_2020 = df.loc[(slice(None), 2020), :]
print(df_2020)

worker      person_1       person_2       person_3      
job            job_1 job_2    job_1 job_2    job_1 job_2
city   year                                             
city_1 2020       45    10       70    98       40    13
city_2 2020       86    37       27    50       98    38


In [7]:
df_job_1 = df.loc[:, (slice(None), 'job_1')]
print(df_job_1)

worker      person_1 person_2 person_3
job            job_1    job_1    job_1
city   year                           
city_1 2010       60       66       17
       2020       45       70       40
city_2 2010       82       30       95
       2020       86       27       98


In [8]:
df_city_1_job_2 = df.loc[('city_1', slice(None)), (slice(None), 'job_2')]
print(df_city_1_job_2)

worker      person_1 person_2 person_3
job            job_2    job_2    job_2
city   year                           
city_1 2010       93       28       29
       2020       10       98       13


In [9]:
# Пример DataFrame с мультииндексом
index = pd.MultiIndex.from_product(
    [['city_1', 'city_2'], [2010, 2020]],
    names=['city', 'year']
)
columns = pd.MultiIndex.from_product(
    [['person_1', 'person_2', 'person_3'], ['job_1', 'job_2']],
    names=['worker', 'job']
)

data = np.random.randint(1, 100, size=(4, 6))
df = pd.DataFrame(data, index=index, columns=columns)

print("\nDataFrame для работы:")
print(df)


DataFrame для работы:
worker      person_1       person_2       person_3      
job            job_1 job_2    job_1 job_2    job_1 job_2
city   year                                             
city_1 2010       61    61        5    77       97    51
       2020       64    96       86    45       36    35
city_2 2010       99    67       43    11       83    56
       2020       96    61       11    81       71    39


In [10]:
idx = pd.IndexSlice
df_person_1_3 = df.loc[:, idx[['person_1', 'person_3'], :]]
print(df_person_1_3)

worker      person_1       person_3      
job            job_1 job_2    job_1 job_2
city   year                              
city_1 2010       61    61       97    51
       2020       64    96       36    35
city_2 2010       99    67       83    56
       2020       96    61       71    39


In [11]:
df_city_1_first_two = df.loc[idx['city_1', :], idx[['person_1', 'person_2'], :]]
print(df_city_1_first_two)

worker      person_1       person_2      
job            job_1 job_2    job_1 job_2
city   year                              
city_1 2010       61    61        5    77
       2020       64    96       86    45


In [12]:
# Создание Series
ser1 = pd.Series(['a', 'b', 'c'], index=[1, 2, 3])
ser2 = pd.Series(['b', 'c', 'f'], index=[4, 5, 6])

# Outer join
outer_join = pd.concat([ser1, ser2], join='outer')
print("\nOuter join:")
print(outer_join)

# Inner join
inner_join = pd.concat([ser1, ser2], join='inner')
print("\nInner join:")
print(inner_join)


Outer join:
1    a
2    b
3    c
4    b
5    c
6    f
dtype: object

Inner join:
1    a
2    b
3    c
4    b
5    c
6    f
dtype: object
