## 辞書型の内包表現

In [1]:
dic1 = {
            category:len(category)
                for category in ['banana', 'apple', 'strawberry']
}
    
print(dic1)

{'banana': 6, 'apple': 5, 'strawberry': 10}


In [2]:
dic2 = {'orange':100}

print(dic2)

{'orange': 100}


## アスタリスクを使った辞書型の結合

In [3]:
{**dic1, **dic2}

{'banana': 6, 'apple': 5, 'strawberry': 10, 'orange': 100}

In [4]:
{**dic1, 'lemon':50}

{'banana': 6, 'apple': 5, 'strawberry': 10, 'lemon': 50}

## update をつかった辞書型の結合

In [5]:
import copy

dic3 = copy.copy(dic1)

dic3.update(dic2)

print(dic3)

{'banana': 6, 'apple': 5, 'strawberry': 10, 'orange': 100}


## マージ演算子

In [6]:
!python --version

Python 3.9.7


In [7]:
dic1 | dic2

{'banana': 6, 'apple': 5, 'strawberry': 10, 'orange': 100}

## 大きめのデータで処理速度を比較

In [8]:
import pandas as pd
import time

df = pd.read_csv("sales_train_validation.csv")
df = df.iloc[:, :200]

print(df.shape)
display(df.head())

(30490, 200)


Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_185,d_186,d_187,d_188,d_189,d_190,d_191,d_192,d_193,d_194
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,1,2,1,2,3,1,2,0,0
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,3,2,0,0,0,0,0,0,0,2


## アスタリスク

In [9]:
start = time.time()

dic4 = {}

for column in df.columns:
    
    dic_tmp = {column+str(k):v for k,v in df[column].to_dict().items()}
    
    dic4 = {**dic4, **dic_tmp}
    
print(f"elapsed_time:{round((time.time() - start), 1)}[sec]")

elapsed_time:35.1[sec]


## update

In [10]:
start = time.time()

dic4 = {}

for column in df.columns:
    
    dic_tmp = {column+str(k):v for k,v in df[column].to_dict().items()}
    
    dic4.update(dic_tmp)
    
print(f"elapsed_time:{round((time.time() - start), 1)}[sec]")

elapsed_time:2.7[sec]


## マージ演算子

In [11]:
start = time.time()

dic4 = {}

for column in df.columns:
    
    dic_tmp = {column+str(k):v for k,v in df[column].to_dict().items()}
    
    dic4 = dic4 | dic_tmp
    
print(f"elapsed_time:{round((time.time() - start), 1)}[sec]")

elapsed_time:7.5[sec]
