# Prototyping

In [None]:
 # 如果真正的困難是不知道怎麼用 DataFrame 做到以下的事，請先備妥只有一句的 DataFrame 再想。
 # 一天之後想不出來的話就再把進度貼出來問我。
 
 # 譯文 slot 順序與原文不同；本來是 flight_mod, fromloc.city_name, toloc.city_name
 text = '什麼是fromloc.city_name到toloc.city_name的flight_mod飛行'
 
 # 假定每句原文的 slot names 是已知的。
 # start/end 設為 None 只是示意，實作時有很多別的方法與考量。
 # 與 list 不同，set/dict 本來就跟順序無關。
 slot_name_start_end_dict = {
     'flight_mod': {'start': None, 'end': None},
     'fromloc.city_name': {'start': None, 'end': None},
     'toloc.city_name': {'start': None, 'end': None},
     'airline_name': {'start':None,'end':None}
 }

 #------以下測試用---------
 #slot_name_start_end_df=pd.Dataframe(slot_name_start_end_dict)
 #slot_name_start_end_df.sort_values(by='start')
 #--------------------------------------------------------------
 
 # 在 loop 裡修改的東西只可以是 value, 不可以是 key, 而且跟 key 順序無關。
 for slot_name in slot_name_start_end_dict.keys():
     begin = text.find(slot_name)
     offset = len(slot_name)
     slot_name_start_end_dict[slot_name]['start'] = begin
     slot_name_start_end_dict[slot_name]['end'] = begin + offset
     
 # 這裡可以變成 unit test:
 # 平常最好先把這個寫下來 (test-driven development).
 # 實作資料結構未必是 slot_name_start_end_map: Dict[str, Dict[str, int]],
 # 所以測試對象應抽象化為 flight_mod_left_boundary 和 flight_mod_right_boundary.
 flight_mod_left_boundary = slot_name_start_end_dict['flight_mod']['start']
 flight_mod_right_boundary = slot_name_start_end_dict['flight_mod']['end']
 flight_mod_len = flight_mod_right_boundary - flight_mod_left_boundary
 assert 37 == flight_mod_left_boundary, f'Wrong: {flight_mod_left_boundary}'
 assert 47 == flight_mod_right_boundary, f'Wrong: {flight_mod_right_boundary}'
 assert len('flight_mod') == flight_mod_len, f'Wrong: {flight_mod_len}'
 # 以下類推
 # ...
 # 另外還應該要檢查 slot 數量，這裡省略。
  # 測試過了之後，回頭試著把實作改成 slot_name_start_end_dict.map(...)
 #slot_name_start_end_dict.map()
 # map() 版成功之後，就比較容易理解要怎麼改寫成 DataFrame.apply(...)
 print(flight_mod_left_boundary,'left',flight_mod_right_boundary,'right',flight_mod_len,'len')
 list_item=list(slot_name_start_end_dict.keys())
 for i in slot_name_start_end_dict.values():
   #print(slot_name_start_end_dict.keys(),'',i)
   print(list (slot_name_start_end_dict.keys()) [list (slot_name_start_end_dict.values()).index (i)],'',i)

   #if(slot_name_start_end_dict[slot_name]['start']>=0):
    #print(i)

37 left 47 right 10 len
flight_mod  {'start': 37, 'end': 47}
fromloc.city_name  {'start': 3, 'end': 20}
toloc.city_name  {'start': 21, 'end': 36}
airline_name  {'start': -1, 'end': 11}


## What I want

- Learn how to use `map()`
- For what purpose?

## What I have tried

In [None]:
#map函式
# User-defined function to pass to map()
# function as the first argument
def getLength(iterable):
    return len(iterable)

In [None]:
# Function to print the map output
def show_result(map_object):
    for item in map_object:
        print(item, end=' ')
    print('')  # for new line

In [None]:
# Generic Function to print the iterator and its content
def print_Iter(iter_in):
    if isinstance(iter_in, str):
        print("The input iterable, '{}' is a String. Its length is {}.".format(iter_in, len(iter_in)))
    if isinstance(iter_in, (list, tuple, set)):
        print("The input iterable, {} is a {}. It has {} elements.".format(iter_in, type(iter_in).__name__, len(iter_in)))
        for item in iter_in:
            print("The {} contains '{}' and its length is {}.".format(type(iter_in).__name__, item, len(item)))
    if isinstance(iter_in, dict):
        print("The input iterable, {} is a {}. It has {} elements.".format(iter_in, type(iter_in).__name__, len(iter_in)))
        for key, value in iter_in.items():
            print("Dict key is '{}' and value is {}.".format(key, value))

In [None]:
# Considering Dict as our iterable parameter
#iter_Dict = {"Python":0, "CSharp":0, "Java":0}
print_Iter(slot_name_start_end_dict)

# Calling map() function on a dictionary
map_result =  map(getLength, slot_name_start_end_dict)
print("Type of map_result is {}".format(type(map_result)))

# Printing map() output
print("Lengths are: ")
show_result(map_result)

The input iterable, {'flight_mod': {'start': 37, 'end': 47}, 'fromloc.city_name': {'start': 3, 'end': 20}, 'toloc.city_name': {'start': 21, 'end': 36}, 'airline_name': {'start': -1, 'end': 11}} is a dict. It has 4 elements.
Dict key is 'flight_mod' and value is {'start': 37, 'end': 47}.
Dict key is 'fromloc.city_name' and value is {'start': 3, 'end': 20}.
Dict key is 'toloc.city_name' and value is {'start': 21, 'end': 36}.
Dict key is 'airline_name' and value is {'start': -1, 'end': 11}.
Type of map_result is <class 'map'>
Lengths are: 
10 17 15 12 


## What obstacles I faced

> 想要問一下Mike這麼做所產生的結果是否正確

## Mike's comment

`map()` 的用法本身沒有問題。接下來要想的是 "for what purpose" 這個部分。

本來的實作是：
```python
for slot_name in slot_name_start_end_dict.keys():
    begin = text.find(slot_name)
    offset = len(slot_name)
    slot_name_start_end_dict[slot_name]['start'] = begin
    slot_name_start_end_dict[slot_name]['end'] = begin + offset
```

而 unit test 是：
```python
flight_mod_left_boundary = slot_name_start_end_dict['flight_mod']['start']
flight_mod_right_boundary = slot_name_start_end_dict['flight_mod']['end']
flight_mod_len = flight_mod_right_boundary - flight_mod_left_boundary
assert 37 == flight_mod_left_boundary, f'Wrong: {flight_mod_left_boundary}'
assert 47 == flight_mod_right_boundary, f'Wrong: {flight_mod_right_boundary}'
assert len('flight_mod') == flight_mod_len, f'Wrong: {flight_mod_len}'
```

要怎麼改用 `map()` 又能通過測試？

In [None]:
from typing import Callable, Dict


def test_get_slot_boudary_dict(
    func_get_slot_boundary_dict: Callable[
        [str, Dict[str, Dict[str, int]]],
        Dict[str, Dict[str, int]]
    ]
) -> None:
    text = '什麼是fromloc.city_name到toloc.city_name的flight_mod飛行'
    slot_name_start_end_dict = {
        'flight_mod': {'start': None, 'end': None},
        'fromloc.city_name': {'start': None, 'end': None},
        'toloc.city_name': {'start': None, 'end': None},
        'airline_name': {'start':None, 'end':None}
    }

    slot_boundaries = func_get_slot_boundary_dict(
        text,
        slot_name_start_end_dict
    )
    
    flight_mod_left = slot_boundaries['flight_mod']['start']
    flight_mod_right = slot_boundaries['flight_mod']['end']
    flight_mod_len = flight_mod_right - flight_mod_left
    assert 37 == flight_mod_left, f'Wrong: {flight_mod_left}'
    assert 47 == flight_mod_right, f'Wrong: {flight_mod_right}'
    assert len('flight_mod') == flight_mod_len, f'Wrong: {flight_mod_len}'

    airline_name_left = slot_boundaries['airline_name']['start']
    airline_name_right = slot_boundaries['airline_name']['end']
    assert -1 == airline_name_left, f'Wrong: {airline_name_left}'
    # TODO: fix me
    # assert -1 == airline_name_right, f'Wrong: {airline_name_right}'

In [None]:
def get_slot_boudary_dict_loop_version(
    utterance: str,
    slot_boundary_dict: Dict[str, Dict[str, int]]
) -> Dict[str, Dict[str, int]]:
    for slot_name in slot_boundary_dict.keys():
        begin = utterance.find(slot_name)
        offset = len(slot_name)
        slot_boundary_dict[slot_name]['start'] = begin
        slot_boundary_dict[slot_name]['end'] = begin + offset
    return slot_boundary_dict


test_get_slot_boudary_dict(get_slot_boudary_dict_loop_version)

In [None]:
def get_slot_boudary_dict_map_version(
    utterance: str,
    slot_boundary_dict: Dict[str, Dict[str, int]]
) -> Dict[str, Dict[str, int]]:
    # TODO: 
    # map(func, slot_boundary_dict)
    raise NotImplementedError('Implement me')


test_get_slot_boudary_dict(get_slot_boudary_dict_map_version)

思考怎麼實作時應該會發現幾個狀況：

1. 真的有必要傳 `slot_boundary_dict` 進去嗎？輸入條件明明只是裡面的 slot_name
2. 最重要的結果來自 `str.find()`, 也就是 slot_name 在 utterance 裡的位置，於是輸出似乎也沒必要是 nested `dict`
3. 如果輸入輸出都不是 nested `dict`, `map()` 用法應該跟常見範例差不多
4. 跟 `map()` 常見範例差不多的話，應該也可以用 `lambda` 或 comprehension

# Production code

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

import os
os.chdir('/content/drive/My Drive/dataset/')
#!ls

#!pwd

# 匯入json模組
import json
# 匯入csv模組
import csv
import pandas as pd
data=pd.read_csv('ATIS.csv',encoding="MS950") #讀csv
data2=pd.read_csv('ATISsample.csv',encoding="MS950")#用這個檔案當作對照的範例檔
#print(data.intent) #csv用 印出intent
#print(data.text) #csv用 印出text
#print(data.values)
print(data.iloc[1,0])
print(data.iloc[1,4]) #第一個數字表示第幾筆資料 後面的數字由0開始依序為intent,entities start,entities end,entities 2...(可參照train.csv)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/
什麼flight_time確實airline_name從出發到fromloc.city_name toloc.city_name
times


In [None]:
#將slot設回原本的初始值(避免資料毀損)
print(data.iloc[1,2],"from_city_start")
print(data.iloc[1,3],"from_city_end")
print(data.iloc[1,4],"from_city")
print(data.iloc[1,6],"loc_city_start")
print(data.iloc[1,7],"loc_city_end")
print(data.iloc[1,8],"loc_city")
print(data.iloc[1,0],"text")
print(data.iloc[1,17])#第二筆資料的最後一個
#for i in range(0,len(data)):
#  for j in range(len(data.iloc[i,])):
#    data.iloc[i,j]=data2.iloc[i,j]

5.0 from_city_start
10.0 from_city_end
times from_city
16.0 loc_city_start
27.0 loc_city_end
continental loc_city
什麼flight_time確實airline_name從出發到fromloc.city_name toloc.city_name text
toloc.city_name


In [None]:
import random
#for i in range (1,len(data)):
#  for j in range(len(data.iloc[i,])):
#    data.iloc[i,j]=data2.iloc[i,j] 整個檔案的初始化(還原成最初的train.csv)
#--------------新slot陣列值
aircraft_code_value=['波音747','波音767','波音777','波音787','波音737','空中巴士A330','空中巴士A340','空中巴士A320','空中巴士A321','麥道MD11']
airline_code_value=['BR','CI','IT','B7','FE','AE','JAL']#和下面airline_name_value須採用同一個random值
airline_name_value=['長榮航空','中華航空','台灣虎航','立榮航空','遠東航空','華信航空','日本航空']
airport_code_value=['TPE','PVG','NRT','PEK','ICN','JFK','SEA','KIX','NGO','LGW']#和下面airport_name_valu須採用同個random值
airport_name_value=['臺灣桃園國際機場','上海浦東國際機場','東京成田機場','北京首都國際機場','首爾仁川國際機場','甘迺迪國際機場','西雅圖-塔科馬國際機場','關西國際機場','名古屋中部國際機場','倫敦蓋威克機場']
arrive_date_date_relative_value=['下']
arrive_date_day_name_value=['星期一','星期二','星期三','星期四','星期五','星期六','星期日','星期天']
arrive_date_day_number_value=['一號','二號','三號','四號','五號','六號','七號','八號','九號','十號','十一號','十二號','十三號','十四號','十五號','十六號','十七號','十八號','十九號','二十號','二十一號','二十二號','二十三號','二十六號']
arrive_date_month_name_value=['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']
arrive_time_period_mod_value=['最早的','下午的','最晚的','紅眼','早上的','晚上的']
arrive_time_time_relative_value=['之前','之後']
city_name_value=['上海','東京','北京','首爾','紐約','西雅圖','大阪','名古屋','倫敦','桃園']
class_type_value=['頭等艙','商務艙','經濟艙']
cost_relative_value=['便宜的','昂貴的','最便宜的','花費最少的','最高價的','最低價的','花費最多的','最貴的']
depart_date_date_relative_value=['下(next)']
depart_date_day_name_value=['星期一','星期二','星期三','星期四','星期五','星期六','星期日','星期天']
depart_date_month_name_value=['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']
depart_date_today_relative_value=['明天','今天','今晚','後天','今早']
depart_time_period_mod_value=['最早的','下午的','最晚的','紅眼','早上的','晚上的']
depart_time_time_relative_value=['之前','之後']
fare_amount_value=['元']
flight_days_value=['每日','每天']
flight_mod_value=['最短的','最早的','晚上的','下午的','最晚的','紅眼','早上的']
flight_number_value=['B77016','BR2179','BR716','BR170','B77016','CI0012','BR178','CI0150','CI0069']
flight_stop_value=['直達','轉機']
fromloc_airport_code_value=['TPE','PVG','NRT','PEK','ICN','JFK','SEA','KIX','NGO','LGW']#和下方需同一個random值
fromloc_airport_name_value=['臺灣桃園國際機場','上海浦東國際機場','東京成田機場','北京首都國際機場','首爾仁川國際機場','甘迺迪國際機場','西雅圖-塔科馬國際機場','關西國際機場','名古屋中部國際機場','倫敦蓋威克機場']
fromloc_city_name_value=['上海','東京','北京','首爾','紐約','西雅圖','大阪','名古屋','倫敦','桃園']
fromloc_state_code_value=['CN','JP','KR','US','GB','TW']#下方同random
fromloc_state_name_value=['中國','日本','韓國','美國','英國','臺灣']
meal_value=['餐']
meal_description_value=['早餐','午餐','晚餐']
or_value=['或']#??
return_date_date_relavite_value=['這幾天']
return_date_day_name_value=['星期一','星期二','星期三','星期四','星期五','星期六','星期日','星期天']
return_date_month_name_value=['一月','二月','三月','四月','五月','六月','七月','八月','九月','十月','十一月','十二月']
return_date_today_relative_value=['明天','今天','今晚','後天','今早']
return_time_period_mod_value=['最早的','下午的','最晚的','紅眼','早上的','晚上的']
round_trip_value=['來回','單程','去程','回程']
stoploc_airport_name_value=['臺灣桃園國際機場','上海浦東國際機場','東京成田機場','北京首都國際機場','首爾仁川國際機場','甘迺迪國際機場','西雅圖-塔科馬國際機場','關西國際機場','名古屋中部國際機場','倫敦蓋威克機場']
stoploc_city_name_value=['上海','東京','北京','首爾','紐約','西雅圖','大阪','名古屋','倫敦','桃園']
toloc_airport_code_value=['TPE','PVG','NRT','PEK','ICN','JFK','SEA','KIX','NGO','LGW']#和下方同random
toloc_airport_name_value=['臺灣桃園國際機場','上海浦東國際機場','東京成田機場','北京首都國際機場','首爾仁川國際機場','甘迺迪國際機場','西雅圖-塔科馬國際機場','關西國際機場','名古屋中部國際機場','倫敦蓋威克機場']
toloc_city_name_value=['上海','東京','北京','首爾','紐約','西雅圖','大阪','名古屋','倫敦','桃園']
toloc_country_name_value=['中國','日本','韓國','美國','英國','臺灣']
#---
arrive_time_end_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點']
arrive_time_period_of_day_value=['下午的','晚上的','早上的','上午的','傍晚的','紅眼時段的']
arrive_time_start_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00','00:00']
arrive_time_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00','00:00']
connect_value=['直達']
depart_date_day_number_value=['一號','二號','三號','四號','五號','六號','七號','八號','九號','十號','十一號','十二號','十三號','十四號','十五號','十六號','十七號','十八號','十九號','二十號','二十一號','二十二號','二十三號','二十六號']
depart_time_end_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00','00:00']
depart_time_period_of_day_value=['下午的','晚上的','早上的','上午的','傍晚的','紅眼時段的']
depart_time_start_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00','00:00']
depart_time_time_value=['一點','二點','三點','四點','五點','六點','七點','八點','九點','十點','十一點','十二點','十三點','十四點','十五點','十六點','十七點','十八點','十九點','二十點','二十一點','二十二點','二十三點','二十四點','01:00','02:00','03:00','04:00','05:00','06:00','07:00','08:00','09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00','21:00','22:00','23:00','00:00']
flight_time_value=['航班資訊']
mod_value=['most']
period_of_day_value=['下午的','晚上的','早上的','上午的','傍晚的','紅眼時段的']
return_date_day_number_value=['一號','二號','三號','四號','五號','六號','七號','八號','九號','十號','十一號','十二號','十三號','十四號','十五號','十六號','十七號','十八號','十九號','二十號','二十一號','二十二號','二十三號','二十六號']
return_time_period_of_day_value=['下午的','晚上的','早上的','上午的','傍晚的','紅眼時段的']

#--
depart_date_year_value=['']
economy_value=['']
fare_basis_code_value=['']
state_code_value=['']
stoploc_state_code_value=['']
toloc_state_code_value=['']
toloc_state_name_value=['']
#-----------------------要替換的slot新值
new_aircraft_code=aircraft_code_value[random.randint(0,len(aircraft_code_value)-1)]#1
new_airline_code=airline_code_value[random.randint(0,len(airline_code_value)-1)]#2
new_airline_name=airline_name_value[random.randint(0,len(airline_name_value)-1)]#3
new_airport_code=airport_code_value[random.randint(0,len(airport_code_value)-1)]#4
new_airport_name=airport_name_value[random.randint(0,len(airport_name_value)-1)]#5
new_arrive_date_date_relative=arrive_date_date_relative_value[random.randint(0,len(arrive_date_date_relative_value)-1)]#6
new_arrive_date_day_name=arrive_date_day_name_value[random.randint(0,len(arrive_date_day_name_value)-1)]#7
new_arrive_date_day_number=arrive_date_day_number_value[random.randint(0,len(arrive_date_day_number_value)-1)]#8
new_arrive_date_month_name=arrive_date_month_name_value[random.randint(0,len(arrive_date_month_name_value)-1)]#9
new_arrive_time_period_mod=arrive_time_period_mod_value[random.randint(0,len(arrive_time_period_mod_value)-1)]#10
new_arrive_time_time_relative=arrive_time_time_relative_value[random.randint(0,len(arrive_time_time_relative_value)-1)]#11
new_city_name=city_name_value[random.randint(0,len(city_name_value)-1)]#12
new_class_type=class_type_value[random.randint(0,len(class_type_value)-1)]#13
new_cost_relative=cost_relative_value[random.randint(0,len(cost_relative_value)-1)]#14
new_depart_date_date_relative=depart_date_date_relative_value[random.randint(0,len(depart_date_date_relative_value)-1)]#15
new_depart_date_day_name=depart_date_day_name_value[random.randint(0,len(depart_date_day_name_value)-1)]#16
new_depart_date_month_name=depart_date_month_name_value[random.randint(0,len(depart_date_month_name_value)-1)]#17
new_depart_date_today_relative=depart_date_today_relative_value[random.randint(0,len(depart_date_today_relative_value)-1)]#18
new_depart_time_period_mod=depart_time_period_mod_value[random.randint(0,len(depart_time_period_mod_value)-1)]#19
new_depart_time_time_relative=depart_time_time_relative_value[random.randint(0,len(depart_time_time_relative_value)-1)]#20
new_fare_amount=fare_amount_value[random.randint(0,len(fare_amount_value)-1)]#21
new_flight_days=flight_days_value[random.randint(0,len(flight_days_value)-1)]#22
new_flight_mod=flight_mod_value[random.randint(0,len(flight_mod_value)-1)]#23
new_flight_number=flight_number_value[random.randint(0,len(flight_number_value)-1)]#24
new_flight_stop=flight_stop_value[random.randint(0,len(flight_stop_value)-1)]#25
new_fromloc_airport_code=fromloc_airport_code_value[random.randint(0,len(fromloc_airport_code_value)-1)]#26
new_fromloc_airport_name=fromloc_airport_name_value[random.randint(0,len(fromloc_airport_name_value)-1)]#27
new_fromloc_city_name=fromloc_city_name_value[random.randint(0,len(fromloc_city_name_value)-1)]#28
new_fromloc_state_code=fromloc_state_code_value[random.randint(0,len(fromloc_state_code_value)-1)]#29
new_fromloc_state_name=fromloc_state_name_value[random.randint(0,len(fromloc_state_name_value)-1)]#30
new_meal=meal_value[random.randint(0,len(meal_value)-1)]#31
new_meal_description=meal_description_value[random.randint(0,len(meal_description_value)-1)]#32
new_or=or_value[random.randint(0,len(or_value)-1)]#33
new_return_date_date_relavite=return_date_date_relavite_value[random.randint(0,len(return_date_date_relavite_value)-1)]#34
new_return_date_day_name=return_date_day_name_value[random.randint(0,len(return_date_day_name_value)-1)]#35
new_return_date_month_name=return_date_month_name_value[random.randint(0,len(return_date_month_name_value)-1)]#36
new_return_date_today_relative=return_date_today_relative_value[random.randint(0,len(return_date_today_relative_value)-1)]#37
new_return_time_period_mod=return_time_period_mod_value[random.randint(0,len(return_time_period_mod_value)-1)]#38
new_round_trip=round_trip_value[random.randint(0,len(round_trip_value)-1)]#39
new_stoploc_airport_name=stoploc_airport_name_value[random.randint(0,len(stoploc_airport_name_value)-1)]#40
new_stoploc_city_name=stoploc_city_name_value[random.randint(0,len(stoploc_city_name_value)-1)]#41
new_toloc_airport_code=toloc_airport_code_value[random.randint(0,len(toloc_airport_code_value)-1)]#42
new_toloc_airport_name=toloc_airport_name_value[random.randint(0,len(toloc_airport_name_value)-1)]#43
new_toloc_city_name=toloc_city_name_value[random.randint(0,len(toloc_city_name_value)-1)]#44
new_toloc_country_name=toloc_country_name_value[random.randint(0,len(toloc_country_name_value)-1)]#45
#----
new_arrive_time_end_time=arrive_time_end_time_value[random.randint(0,len(arrive_time_end_time_value)-1)]
new_arrive_time_period_of_day=arrive_time_period_of_day_value[random.randint(0,len(arrive_time_period_of_day_value)-1)]
new_arrive_time_start_time=arrive_time_start_time_value[random.randint(0,len(arrive_time_start_time_value)-1)]
new_arrive_time_time=arrive_time_time_value[random.randint(0,len(arrive_time_time_value)-1)]
new_connect=connect_value[random.randint(0,len(connect_value)-1)]
new_depart_date_day_number=depart_time_end_time_value[random.randint(0,len(depart_date_day_number_value)-1)]
new_depart_time_end_time=depart_time_end_time_value[random.randint(0,len(depart_time_end_time_value)-1)]
new_depart_time_period_of_day=depart_time_period_of_day_value[random.randint(0,len(depart_time_period_of_day_value)-1)]
new_depart_time_start_time=depart_time_start_time_value[random.randint(0,len(depart_time_start_time_value)-1)]
new_depart_time_time=depart_time_time_value[random.randint(0,len(depart_time_time_value)-1)]
new_flight_time=flight_time_value[random.randint(0,len(flight_time_value)-1)]
new_mod=mod_value=mod_value[random.randint(0,len(mod_value)-1)]
new_period_of_day=period_of_day_value[random.randint(0,len(period_of_day_value)-1)]
new_return_date_day_number=return_date_day_number_value[random.randint(0,len(return_date_day_number_value)-1)]
new_return_time_period_of_day=return_time_period_of_day_value[random.randint(0,len(return_time_period_of_day_value)-1)]
#---
new_depart_date_year=depart_date_year_value[random.randint(0,len(depart_date_year_value)-1)]
new_economy=economy_value[random.randint(0,len(economy_value)-1)]
new_fare_basis_code=fare_basis_code_value[random.randint(0,len(fare_basis_code_value)-1)]
new_state_code=state_code_value[random.randint(0,len(state_code_value)-1)]
new_stoploc_state_code=stoploc_state_code_value[random.randint(0,len(stoploc_state_code_value)-1)]
new_toloc_state_code=toloc_state_code_value[random.randint(0,len(toloc_state_code_value)-1)]
new_toloc_state_name=toloc_state_name_value[random.randint(0,len(toloc_state_name_value)-1)]


In [None]:
#建立一個字典
dict={'aircraft_code':new_aircraft_code,'airline_code':new_airline_code,'airline_name':new_airline_name,'airport_code':new_airport_code,'airport_name':new_airport_name,
      'arrive_date.date_relative':new_arrive_date_date_relative,'arrive_date.day_name':new_arrive_date_day_name,"arrive_date.day_number":new_arrive_date_day_number,'arrive_date.month_name':new_arrive_date_month_name,'arrive_time.period_mod':new_arrive_time_period_mod,
      'arrive_time.time_relative':new_arrive_time_time_relative,'city_name':new_city_name,'class_type':new_class_type,'cost_relative':new_cost_relative,'depart_date.date_relative':new_depart_date_date_relative,
      'depart_date.day_name':new_depart_date_day_name,'depart_date.month_name':new_depart_date_month_name,'depart_date.today_relative':new_depart_date_today_relative,'depart_time.period_mod':new_depart_time_period_mod,'depart_time.time_relative':new_depart_time_time_relative,
      'fare_amount':new_fare_amount,'flight_days':new_flight_days,'flight_mod':new_flight_mod,'flight_number':new_flight_number,'flight_stop':new_flight_stop,
      'fromloc.airport_code':new_fromloc_airport_code,'fromloc.airport_name':new_fromloc_airport_name,'fromloc.city_name':new_fromloc_city_name,'fromloc.state_code':new_fromloc_state_code,'fromloc.state_name':new_fromloc_state_name,
      'meal':new_meal,'meal_description':new_meal_description,'or':new_or,'return_date.date_relavite':new_return_date_date_relavite,'return_date.day_name':new_return_date_day_name,
      'return_date.month_name':new_return_date_month_name,'return_date.today_relative':new_return_date_today_relative,'return_time.period_mod':new_return_time_period_mod,'round_trip':new_round_trip,'stoploc.airport_name':new_stoploc_airport_name,
      'stoploc.city_name':new_stoploc_city_name,'toloc.airport_code':new_toloc_airport_code,'toloc.airport_name':new_toloc_airport_name,'toloc.city_name':new_toloc_city_name,'toloc.country_name':new_toloc_country_name,'arrive_time.end_time':new_arrive_time_end_time,'arrive_time.period_of_day':new_arrive_time_period_of_day
      ,'arrive_time.start_time':new_arrive_time_start_time,'arrive_time.time':new_arrive_time_time,'connect':new_connect,'depart_date.day_number':new_depart_date_day_number,'depart_date.year':new_depart_date_year,'depart_time.end_time':new_depart_time_end_time,'depart_time.period_of_day':new_depart_time_period_of_day
      ,'depart_time.start_time':new_depart_time_start_time,'depart_time.time':new_depart_time_time,'economy':new_economy,'fare_basis_code':new_fare_basis_code,'flight_time':new_flight_time,'mod':new_mod,'period_of_day':new_period_of_day,'return_date.day_number':new_return_date_day_number,'return_time.period_of_day':new_return_time_period_of_day,'state_code':new_state_code
      ,'stoploc.state_code':new_stoploc_state_code,'toloc.state_code':new_toloc_state_code,'toloc.state_name':new_toloc_state_name
      }
msg=''
#針對下方data.iloc[i,j]的每個j值查詢是否符合字典內特定項,如果有就讓new_slot存取該字典對應到的變數名稱

In [None]:
for i in range(0,100):
  print('第',i,'筆資料')
  for j in range(5,len(data.iloc[i,]),4):
    data.iloc[i,j]=data2.iloc[i,j]
    a=data.iloc[i,j]
    if(str(a)=='nan'):
      continue
    else:
      print(a)
      msg=dict[a]
      new_slot=msg
      print('new_slot:',new_slot)
      old_slot=data.iloc[i,j]
      print('old_slot:',old_slot)
      text=data.iloc[i,0]
      print(text)
      text=text.replace(old_slot,new_slot)
      print(text)
      data.iloc[i,j-1]=new_slot
      print(data.iloc[i,j-1])
      data.iloc[i,0]=text
      print(data.iloc[i,0])
      new_entity_start=text.find(new_slot)
      data.iloc[i,j-3]=int(new_entity_start)
      print(data.iloc[i,j-3])
      new_entity_end=new_entity_start+len(new_slot)
      data.iloc[i,j-2]=int(new_entity_end)
      print(data.iloc[i,j-2])

第 0 筆資料
flight_mod
new_slot: 晚上的
old_slot: flight_mod
什麼是fromloc.city_name到toloc.city_name的flight_mod飛行
什麼是fromloc.city_name到toloc.city_name的晚上的飛行
晚上的
什麼是fromloc.city_name到toloc.city_name的晚上的飛行
37.0
40.0
fromloc.city_name
new_slot: 東京
old_slot: fromloc.city_name
什麼是fromloc.city_name到toloc.city_name的晚上的飛行
什麼是東京到toloc.city_name的晚上的飛行
東京
什麼是東京到toloc.city_name的晚上的飛行
3.0
5.0
toloc.city_name
new_slot: 東京
old_slot: toloc.city_name
什麼是東京到toloc.city_name的晚上的飛行
什麼是東京到東京的晚上的飛行
東京
什麼是東京到東京的晚上的飛行
3.0
5.0
第 1 筆資料
flight_time
new_slot: 航班資訊
old_slot: flight_time
什麼flight_time確實airline_name從出發到fromloc.city_name toloc.city_name
什麼航班資訊確實airline_name從出發到fromloc.city_name toloc.city_name
航班資訊
什麼航班資訊確實airline_name從出發到fromloc.city_name toloc.city_name
2.0
6.0
airline_name
new_slot: 華信航空
old_slot: airline_name
什麼航班資訊確實airline_name從出發到fromloc.city_name toloc.city_name
什麼航班資訊確實華信航空從出發到fromloc.city_name toloc.city_name
華信航空
什麼航班資訊確實華信航空從出發到fromloc.city_name toloc.city_name
8.0
12.0
fromloc.city_name
new_slot: 東

In [None]:
os.getcwd()
data.to_csv('output2.csv',encoding="MS950")

In [None]:
with open('output.csv','w',newline='',encoding="MS950") as csvfile:
  writer = csv.writer(csvfile, delimiter='\t')
  for i in range(0,len(data)-1):
    s=''
    for j in range(len(data.iloc[i,])):
      if(j==len(data.iloc[i,])-1):
        c=data.iloc[i,j]
        if(str(c)=='nan'):
          s=s+''
        else:
          s=s+str(c)
          
      else:
        c=data.iloc[i,j]
        if(str(c)=='nan'):
          s=s+''
        elif(str(data.iloc[i,j+1])=='nan'):
          s=s+str(c)
          #s.join(c)
        else:
          s=s+str(c)+','
          #s.join(c)+','
    print(s)
    #writer.writerow(s)

什麼是東京到東京的晚上的飛行,flight,37.0,40.0,晚上的,flight_mod,3.0,5.0,東京,fromloc.city_name,3.0,5.0,東京,toloc.city_name
什麼航班資訊確實華信航空從出發到東京 東京,flight_time,2.0,6.0,航班資訊,flight_time,8.0,12.0,華信航空,airline_name,16.0,18.0,東京,fromloc.city_name,16.0,18.0,東京,toloc.city_name
我想要看到東京到東京航班,flight,5.0,7.0,東京,fromloc.city_name,5.0,7.0,東京,toloc.city_name
什麼是東京到東京的最低價的票價,airfare,37.0,41.0,最低價的,cost_relative,3.0,5.0,東京,fromloc.city_name,3.0,5.0,東京,toloc.city_name
列出所有從東京到東京航班,flight,5.0,7.0,東京,fromloc.city_name,5.0,7.0,東京,toloc.city_name
請出示機票東京到達從東京,flight,5.0,7.0,東京,toloc.city_name,5.0,7.0,東京,fromloc.city_name
什麼航班你有從東京到東京,flight,7.0,9.0,東京,fromloc.city_name,7.0,9.0,東京,toloc.city_name
從東京到東京 航班,flight,1.0,3.0,東京,fromloc.city_name,1.0,3.0,東京,toloc.city_name,0.0,0.0,,toloc.state_code
給我從東京的最低價的航班上星期六 東京,flight,21.0,25.0,最低價的,cost_relative,3.0,5.0,東京,fromloc.city_name,3.0,5.0,東京,toloc.city_name,13.0,16.0,星期六,depart_date.day_name
清單每天從東京到東京與頭等艙航班,flight,2.0,4.0,每天,flight_days,5.0,7.0,東京,fromloc.city_name,5.0,7.0,東京,toloc