In [10]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [12]:
# *** This is a prerequisite of the following code cells ***
# We transform the features based on the clean data
# -------------------------------------------------------
# Question: 是不是因為清完資料，都市住宅分區的 unique 才會變少？Yes

def clean_invalid(dataframe):
    # Clean invalid data
    dataframe = dataframe[(dataframe['交易標的']!='土地') & (~dataframe['交易標的'].isna())]
    dataframe['Month'] = dataframe['交易年月日'].str[:-2].astype('float')
    dataframe = dataframe.query("Month>=10601 and Month<=11003 ")
    dataframe = dataframe[(dataframe.Month!=10600) & (dataframe.Month!=10700) & \
                          (dataframe.Month!=10800) & (dataframe.Month!=10900) & (dataframe.Month!=11000)]
    return dataframe

In [None]:
def encode_city_land_usage(x):
    # Encode {'其他': 0, '住': 1, '農': 2, '工': 3, '商': 4, 
    #         '住商': 5, '其他住宅': 6, nan: nan}
    if isinstance(x, str):
        if '其他' in x:
            end_idx = x.find('(') if '(' in x else len(x) + 1
            if '住商' in x[:end_idx] or '住宅商業' in x[:end_idx]:
                return 5
            if '住宅' in x[:end_idx] or '住' in x[:end_idx]:
                return 6
            return 0
        if '住' in x:
            return 1
        elif '農' in x:
            return 2
        elif '工' in x:
            return 3
        elif '商' in x:
            return 4
    else:
        return x

In [None]:
def is_delimiters(x):
    # If there one of the following delimiters in the
    # field of data['主要用途'], then we call such builing is
    # multiple usage.
    return '、' in x or '；' in x or '．' in x or \
       '.' in x or '‧' in x or '﹒' in x or \
       '，' in x or '﹐' in x or \
       ' ' in x or '　' in x

In [None]:
def encode_main_usage(x):
    if not isinstance(x, str):
        # nan remains
        return x
    
    living = lambda x: True if '住' in x else False
    farm_related = lambda x: True if '農舍' in x or '農業' in x or \
                                     '畜牧' in x or '豬舍' in x or \
                                     '雞舍' in x or '羊舍' in x or \
                                     '牛舍' in x or '禽舍' in x or \
                                     '堆肥舍' in x else False
    manufaturing = lambda x: True if '工場' in x or '工廠' in x or '廠房' in x else False
    business = lambda x: True if '事務所' in x or '辦公' in x or '商業' in x else False
    selling = lambda x: True if '店鋪' in x or '商店' in x or \
                                '店舖' in x or '零售' in x or \
                                '店房' in x or '商場' in x or \
                                '百貨' in x else False
    medical = lambda x: True if '醫' in x or '病' in x or '診所' in x else False
    parking = lambda x: True if '停車' in x or '車庫' in x else False
    others = lambda x: True if '金融' in x or '健身' in x else False
    
    # Ignore the comment in the field
    end_idx = x.find('（') if '（' in x else len(x) - 1
    x = x[:end_idx]

    if living(x):     
        if selling(x):
            return 5
        elif business(x):
            return 4
        elif manufaturing(x):
            return 3
        elif farm_related(x):
            return 2
        elif medical(x):
            return 6
        elif others(x):
            return 8
        else:
            return 1
    else:
        if manufaturing(x):
            return 3
        elif business(x):
            return 4
        elif selling(x):
            return 5
        elif farm_related(x):
            return 2
        elif medical(x):
            return 6
        elif parking(x):
            return 7
        else:
            # Cases list
            # mutiple usage:
            # - https://gist.github.com/JIElite/73a870e25390a23a676da3f9a2c86436
            # single usage:
            # - https://gist.github.com/JIElite/329341d1573088e389cbc84f38dd15ac
            return 8

In [None]:
def encode_have_elevator(x):
    """Encode the value in '電梯' field with one-hot encoding
    
    Return:
    - nan, nan
    - 1, '有'
    - 0, '無'
    """
    if not isinstance(x, str):
        return x
    map_ = {'有': 1, '無': 0}
    return map_[x]

In [None]:
def transform_area(area):
    """Convert the unit of the area from square meter to Ping(坪)
    """
    # If the original area is nan, then it remains.
    if not isinstance(area, str):
        return area
    
    return round(float(area) / 3.3058)

In [None]:
def encode_materials(raw_materials):
    """
    The building materials could be decomposed into the following categories:
    
    --------RC or SRC -----------------------------------------
    - 鋼筋混凝土造, 鋼筋混凝土構造, 鋼筋混凝土, ＲＣ造, 鋼筋混凝土結構造,
        Ｒ．Ｃ造, "鋼筋混凝土（ＲＣ）", "ＲＣ鋼筋混凝土造", "ＲＣ構架造",
        ＲＣ結構造, "Ｒ．Ｃ構造", "鋼筋混凝土造（ＲＣ）", "R.C造", "ＲＣ構造",
        "Ｒ．Ｃ構架造",  "Ｒ．Ｃ構架", "Ｒ‧Ｃ鋼筋混凝土造", "Ｒ．Ｃ結構", "ＲＣ"
    - "鋼骨ＲＣ造", "ＲＣ鋼骨造", "ＳＲＣ造", "ＳＲＣ"
    - 鋼筋
    - 鋼骨, 鋼骨造, 鋼骨構造, 鋼骨結構造
    - 鋼造, 鋼構造
    - 預力混凝土造 (a kind of RC)
    
    --------Concrete-----------
    - 混凝土
    
    -------- Stone -------------
    - 石造 (跟磚石造撞)
    
    -------- Brick related ----------
    - 磚造 (跟土磚, 木石磚撞, conflict ignored)
    - 加強磚造 
    - 磚木, 磚木造
    - 磚石造, 木石磚造
    - 土磚造, 土磚石混合造
    
    --------- Wood, Earth, and Bamboo ------------------
    - 木造 (跟土木造, 磚木造撞, conflict partially ignored)
    - 竹造 (跟土竹造撞, conflict ignored)
    - 土木造
    - 土造, 土塊造 (跟混凝土造撞)
    
    ---- Iron ----
    - 鐵造 (跟鋼鐵造撞)
    - 鐵架
    - 鐵筋
    - 鐵骨, 鐵骨造
    
    --- Others ----
    - 瓦屋頂, 瓦頂
    - 水泥
    - "ＲＣ補強", "鋼筋補強"
    
    -------- Ignored Materials ---------
    - 土竹造 (ignored, duplicated with 加強磚造)
    - 見其他登記事項, 見其它登記事項, 見使用執照 (ignored)
    """
    def is_built_with(material_string, cond_list):
        # Encounter nan
        if not isinstance(material_string, str):
            return False
        
        conflict_dict = {'石造': '磚石造', '木造': '磚木造', 
                     '土造':'混凝土造', '鐵造': '鋼鐵造'}
        result = []
        for cond in cond_list:
            if cond in material_string:
                if cond not in conflict_dict:
                    result.append(True)
                else:
                    if conflict_dict[cond] in material_string:
                        continue
            
        return any(result)
    
    S_cond_list = ['鋼骨', 'S', 'Ｓ']
    R_cond_list = ['鋼筋', 'R', 'Ｒ']
    C_cond_list = ['混凝土', 'C', 'Ｃ']
    steel_cond_list = ['鋼造', '鋼構造']
    stone_cond_list = ['石造']
    brick_cond_list = ['磚造', '加強磚造', '磚木', '磚木造',
                       '磚石造', '木石磚造', '土磚造', '土磚石混合造']
    wood_earth_bamboo_list = ['木造', '竹造', '土木造',
                              '土造', '土塊造']
    iron_cond_list = ['鐵造', '鐵架', '鐵筋', '鐵骨']
    tile_roof_list = ['瓦屋頂', '瓦頂']
    clay_cond_list = ['水泥']
    RC_reinforce_list = ['ＲＣ補強', '鋼筋補強']
    
    suffices_cond_dict = {'S': S_cond_list, 'R': R_cond_list,
                     'C': C_cond_list, 'steel': steel_cond_list,
                     'stone': stone_cond_list, 'B': brick_cond_list,
                     'W': wood_earth_bamboo_list, 'iron': iron_cond_list,
                     'tile': tile_roof_list, 'clay': clay_cond_list}

    encoding = {}
    column_prefix = 'Building_Material_'
    for suffix, suffix_cond_list in suffices_cond_dict.items():
        column_name = column_prefix + suffix
        encoding[column_name] = is_built_with(raw_materials, suffix_cond_list)
    
    # Special Cases Processing
    RC_cond_list = ['預力混凝土造']
    if is_built_with(raw_materials, RC_cond_list):
        encoding[column_prefix + 'R'] = True
        encoding[column_prefix + 'C'] = True
    
    return encoding

In [11]:
def read_clean_csv(path, verbose=True, **kwargs):
    # Read CSV
    df = pd.read_csv(path, **kwargs)
    if verbose:
        print('Number of rows in raw data:', len(df))
        show_num_unique_vals(df)
        
    # Clean invalid data
    df = clean_invalid(df)
    if verbose:
        print('Number of row in cleaned data:', len(df))
        show_num_unique_vals(df)
        
    return df

In [13]:
def show_num_unique_vals(df):
    print('Number of unique values for each column:')
    for column in df.columns:
        print(f'- {column}: {len(df[column].unique())}')

In [14]:
sale_future_data = read_clean_csv('./sale_future_data.csv', dtype='str')

Number of rows in raw data: 234724
Number of unique values for each column:
- 鄉鎮市區: 232
- 交易標的: 4
- 土地位置建物門牌: 86190
- 土地移轉總面積平方公尺: 10067
- 都市土地使用分區: 6
- 非都市土地使用分區: 7
- 非都市土地使用編定: 10
- 交易年月日: 3458
- 交易筆棟數: 209
- 移轉層次: 107
- 總樓層數: 43
- 建物型態: 10
- 主要用途: 13
- 主要建材: 13
- 建築完成年月: 1
- 建物移轉總面積平方公尺: 20376
- 建物現況格局-房: 29
- 建物現況格局-廳: 17
- 建物現況格局-衛: 26
- 建物現況格局-隔間: 2
- 有無管理組織: 1
- 總價元: 7116
- 單價元平方公尺: 103172
- 車位類別: 8
- 車位移轉總面積平方公尺: 4553
- 車位總價元: 677
- 備註: 2301
- 編號: 234724
- 建案名稱: 3413
- 棟及號: 24227
Number of row in cleaned data: 136112
Number of unique values for each column:
- 鄉鎮市區: 167
- 交易標的: 4
- 土地位置建物門牌: 60666
- 土地移轉總面積平方公尺: 5831
- 都市土地使用分區: 6
- 非都市土地使用分區: 7
- 非都市土地使用編定: 8
- 交易年月日: 1550
- 交易筆棟數: 158
- 移轉層次: 79
- 總樓層數: 42
- 建物型態: 10
- 主要用途: 12
- 主要建材: 11
- 建築完成年月: 1
- 建物移轉總面積平方公尺: 15287
- 建物現況格局-房: 17
- 建物現況格局-廳: 10
- 建物現況格局-衛: 17
- 建物現況格局-隔間: 2
- 有無管理組織: 1
- 總價元: 5159
- 單價元平方公尺: 72938
- 車位類別: 8
- 車位移轉總面積平方公尺: 3074
- 車位總價元: 552
- 備註: 1067
- 編號: 136112
- 建案名稱: 1203
- 棟及號: 14323
- Month: 51


In [15]:
sale_data = read_clean_csv('./sale_data.csv', dtype='str')

Number of rows in raw data: 1840485
Number of unique values for each column:
- 鄉鎮市區: 362
- 交易標的: 6
- 土地位置建物門牌: 1661605
- 土地移轉總面積平方公尺: 136905
- 都市土地使用分區: 1739
- 非都市土地使用分區: 14
- 非都市土地使用編定: 22
- 交易年月日: 4055
- 交易筆棟數: 1130
- 移轉層次: 1241
- 總樓層數: 99
- 建物型態: 12
- 主要用途: 3173
- 主要建材: 823
- 建築完成年月: 20426
- 建物移轉總面積平方公尺: 78364
- 建物現況格局-房: 117
- 建物現況格局-廳: 57
- 建物現況格局-衛: 119
- 建物現況格局-隔間: 2
- 有無管理組織: 2
- 總價元: 201715
- 單價元平方公尺: 226361
- 車位類別: 8
- 車位移轉總面積(平方公尺): 10910
- 車位總價元: 1620
- 備註: 101287
- 編號: 1840485
- 主建物面積: 47381
- 附屬建物面積: 8003
- 陽台面積: 6579
- 電梯: 3
- 移轉編號: 8130


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['Month'] = dataframe['交易年月日'].str[:-2].astype('float')


Number of row in cleaned data: 1074132
Number of unique values for each column:
- 鄉鎮市區: 357
- 交易標的: 4
- 土地位置建物門牌: 1028138
- 土地移轉總面積平方公尺: 35781
- 都市土地使用分區: 1005
- 非都市土地使用分區: 13
- 非都市土地使用編定: 16
- 交易年月日: 1551
- 交易筆棟數: 895
- 移轉層次: 1152
- 總樓層數: 86
- 建物型態: 12
- 主要用途: 1915
- 主要建材: 381
- 建築完成年月: 19833
- 建物移轉總面積平方公尺: 73039
- 建物現況格局-房: 101
- 建物現況格局-廳: 50
- 建物現況格局-衛: 102
- 建物現況格局-隔間: 2
- 有無管理組織: 2
- 總價元: 30010
- 單價元平方公尺: 193479
- 車位類別: 8
- 車位移轉總面積(平方公尺): 10208
- 車位總價元: 1415
- 備註: 52091
- 編號: 1074132
- 主建物面積: 44257
- 附屬建物面積: 7393
- 陽台面積: 6223
- 電梯: 3
- 移轉編號: 6530
- Month: 51


In [8]:
print(sale_data.columns)

Index(['鄉鎮市區', '交易標的', '土地位置建物門牌', '土地移轉總面積平方公尺', '都市土地使用分區', '非都市土地使用分區',
       '非都市土地使用編定', '交易年月日', '交易筆棟數', '移轉層次', '總樓層數', '建物型態', '主要用途', '主要建材',
       '建築完成年月', '建物移轉總面積平方公尺', '建物現況格局-房', '建物現況格局-廳', '建物現況格局-衛',
       '建物現況格局-隔間', '有無管理組織', '總價元', '單價元平方公尺', '車位類別', '車位移轉總面積(平方公尺)',
       '車位總價元', '備註', '編號', '主建物面積', '附屬建物面積', '陽台面積', '電梯', '移轉編號', 'Month'],
      dtype='object')


In [9]:
print(sale_future_data.columns)

Index(['鄉鎮市區', '交易標的', '土地位置建物門牌', '土地移轉總面積平方公尺', '都市土地使用分區', '非都市土地使用分區',
       '非都市土地使用編定', '交易年月日', '交易筆棟數', '移轉層次', '總樓層數', '建物型態', '主要用途', '主要建材',
       '建築完成年月', '建物移轉總面積平方公尺', '建物現況格局-房', '建物現況格局-廳', '建物現況格局-衛',
       '建物現況格局-隔間', '有無管理組織', '總價元', '單價元平方公尺', '車位類別', '車位移轉總面積平方公尺', '車位總價元',
       '備註', '編號', '建案名稱', '棟及號', 'Month'],
      dtype='object')


In [None]:
my_columns = ['都市土地使用分區', '主要用途', '主要建材', '車位移轉總面積(平方公尺)', '電梯']
print('Before cleaning invalid data, the unique values for column:')
for column in my_columns:
    print(f'-{column}:{len(sale_data[column].unique())}')
    
print('-'*20)
print('After cleaning invalid data, the unique values for column:')
for column in my_columns:
    print(f'-{column}:{len(sale_data[column].unique())}')

In [None]:
# Encode city land usage
print('Unique values for 都市土地使用分區 before encoding:', sale_data['都市土地使用分區'].unique(), \
      len(sale_data['都市土地使用分區'].unique()))
sale_data['都市土地使用分區'] = sale_data['都市土地使用分區'].apply(encode_city_land_usage)
print('Unique values after encoding:', sale_data['都市土地使用分區'].unique(), \
      len(sale_data['都市土地使用分區'].unique()))

In [None]:
print('Unique values for 主要用途 before encoding:', sale_data['主要用途'].unique(), \
      len(sale_data['主要用途'].unique()))
sale_data['主要用途'] = sale_data['主要用途'].apply(encode_main_usage)

In [None]:
# sale_data['電梯'].unique()
sale_data['電梯'].apply(encode_have_elevator)

In [None]:
# sale_data['車位移轉總面積(平方公尺)'].unique()
sale_data['車位移轉總面積(平方公尺)'].apply(transform_area)

In [None]:
# Encode building materials and add new columns
import time

sale_data_building_materials = {
    'Building_Material_S': [],
    'Building_Material_R': [],
    'Building_Material_C': [],
    'Building_Material_steel': [],
    'Building_Material_stone': [],
    'Building_Material_B': [],
    'Building_Material_W': [],
    'Building_Material_iron': [],
    'Building_Material_tile': [],
    'Building_Material_clay': [],
}

start_t = time.time()
for material_string in sale_data['主要建材']:
    encode_result = encode_materials(material_string)
    for key in encode_result.keys():
        sale_data_building_materials[key].append(encode_result[key])
        
for key in sale_data_building_materials.keys():
    sale_data[key] = sale_data_building_materials[key]

end_t = time.time()
print('Elapsed Time:', end_t - start_t)

In [None]:
sale_data.head()

# The Analysis of 主要用途

In [None]:
values_of_main_usage = sale_data['主要用途'].sort_values()
delimiter = '．'
usage_set = set()
for i, usage in enumerate(values_of_main_usage.unique()):
    if not isinstance(usage, str):
        continue
    if delimiter in usage:
        usages = usage.split(delimiter)
        usage_set = usage_set.union(set(usages))

In [None]:
for i, usage in enumerate(usage_set):
    print(i, usage)

In [None]:
count = 0
for usage in values_of_main_usage:
    if not isinstance(usage, str):
        continue
    if '宿舍' in usage:
        count += 1
        print(usage)
print(count)

In [None]:
sidewalk_conds = ['人行', '步道', '走道', '騎樓']
living_conds = ['住', '民宿']
selling_conds = ['店舖', '店鋪', '店房', '店房', '商店', '店', '零售', \
                 '商場', '百貨']
manufaturing_conds = ['廠房', '工業', '工廠', '工場']
business_conds = ['商業', '辦公', '事務所', '服務業', '工商服務業', '住商']
dorm_conds = ['宿舍']
parking_conds = ['停車', '車庫']
medical_conds = ['醫', '病', '診所']
sport_and_entertainment_conds = ['運動', '健身', '休閒', '保齡球館', \
                                 '活動室', '交誼廳', '娛樂']
farm_conds = ['農舍', '農業', '畜牧', '豬舍', '雞舍', '羊舍', '牛舍', \
              '禽舍', '堆肥舍']
excluded_conds = ['銀行營業廳', '銀行', '金融機構', '保險公司', \
                  '保險分支機構', '電影院', \
                  '汽車改裝業及汽車修理（甲種汽車修理廠）業', \
                  '長期照顧機構〈養護型〉',
                  ]

def is_kind_of_usage(raw_usage, conds):
    # nan remains
    if not isinstance(raw_usage, str):
        return raw_usage

    for cond in conds:
        if cond in raw_usage:
            return True
    
    return False

In [None]:
# Survey the main usage of the building
values_of_man_usage = sale_data['主要用途'].sort_values()
for x in values_of_man_usage.unique():
    
    if not isinstance(x, str):
        # nan remains
        continue
    
    living = lambda x: True if '住' in x else False
    farm_related = lambda x: True if '農舍' in x or '農業' in x or \
                                     '畜牧' in x or '豬舍' in x or \
                                     '雞舍' in x or '羊舍' in x or \
                                     '牛舍' in x or '禽舍' in x or \
                                     '堆肥舍' in x else False
    manufaturing = lambda x: True if '工場' in x or '工廠' in x or '廠房' in x else False
    business = lambda x: True if '事務所' in x or '辦公' in x or '商業' in x else False
    selling = lambda x: True if '店鋪' in x or '商店' in x or \
                                '店舖' in x or '零售' in x or \
                                '店房' in x or '商場' in x or \
                                '百貨' in x else False
    medical = lambda x: True if '醫' in x or '病' in x or '診所' in x else False
    parking = lambda x: True if '停車' in x or '車庫' in x else False
    others = lambda x: True if '金融' in x or '健身' in x else False
    
    # 不考慮備註
    end_idx = x.find('（') if '（' in x else len(x) - 1
    x = x[:end_idx]
    
    if '、' in x or '；' in x or '．' in x or \
       '.' in x or '‧' in x or '﹒' in x or \
       '，' in x or '﹐' in x or \
       ' ' in x or '　' in x:
        # Mutltiple usages
        if living(x):     
            if selling(x):
                pass
            elif business(x):
                pass
            elif manufaturing(x):
                pass
            elif farm_related(x):
                pass
            elif medical(x):
                pass
            elif others(x):
                pass
            else:
                pass
        else:
            if manufaturing(x):
                pass
            elif business(x):
                pass
            elif selling(x):
                pass
            elif farm_related(x):
                pass
            elif medical(x):
                pass
            elif parking(x):
                pass
            else:
                pass
    else:
        if living(x):
            if selling(x):
                pass
            elif business(x):
                pass
            elif manufaturing(x):
                pass
            elif farm_related(x):
                pass
            elif medical(x):
                pass
            elif others(x):
                pass
            else:
                pass
        else:
            if manufaturing(x):
                pass
            elif business(x):
                pass
            elif selling(x):
                pass
            elif farm_related(x):
                pass
            elif medical(x):
                pass
            elif parking(x):
                pass
            else:
                # Cases list:
                print(x)

# The Analysis of 主要建材

In [None]:
# Enumerate all distinct building materials
for i, material in enumerate(sale_data['主要建材'].unique()):
    if not isinstance(material, str):
        continue
    print(i, material)

In [None]:
# Visualize the distribution of building materials
# The y-axis is logarithmized.
from operator import itemgetter
import warnings
import math
import matplotlib.pyplot as plt


material_map = {}
for material in sale_data['主要建材']:
    if material not in material_map:
        material_map[material] = 1
    else:
        material_map[material] += 1

material_map_items = sorted(material_map.items(), key=itemgetter(1), reverse=True)
material_statistics = []
material_name = []
for key, value in material_map_items:
#     print(key, value)
    material_name.append(key)
    material_statistics.append(int(value))
    
material_name = np.array(material_name)
material_count = np.array(material_statistics)
material_count = np.log(material_count)

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    plt.bar(material_name, material_count)
    plt.show()

In [None]:
def find_substring_and_propotion(dataframe, column, pattern, verbose=True):
    """List the unique content contains specific pattern in a given column.
    
    Example:
        column = '主要建材'
        substring = '混凝土'
        find_substring_and_propotion(sale_data, column, substring)
        
        鋼筋混凝土造
        鋼骨鋼筋混凝土造
        鋼筋混凝土加強磚造
        鋼筋混凝土構造
        鋼骨混凝土造
        鋼筋混凝土加強空心磚造
        ...
        ...
        Number of observations in the dataframe: 900651
        The propotion: 83.85%
    """
    for elem in dataframe[column].unique():
        # ignore nan
        if not isinstance(elem, str):
            continue
        if pattern in elem:
            print(elem)
        
    sample_count = len([data for data in dataframe[column] \
                        if isinstance(data, str) and pattern in data])
    total_sample = len(dataframe)
    propotion = sample_count / total_sample  
    if verbose:
        print('Number of observations in the dataframe:', sample_count)
        print(f'The propotion: {propotion * 100:.2f}%')
        
    return sample_count, propotion*100


column = '主要建材'
substring = '磚石造'
find_substring_and_propotion(sale_data, column, substring)

In [None]:
df1_ = sale_data.loc[lambda df: df['主要建材'] == '木造']['單價元平方公尺'].astype('float')
df2_ = sale_data.loc[lambda df: df['主要建材'] == '土木造']['單價元平方公尺'].astype('float')
df3_ = sale_data.loc[lambda df: df['主要建材'] == '竹造']['單價元平方公尺'].astype('float')
df4_ = sale_data.loc[lambda df: df['主要建材'] == '土竹造']['單價元平方公尺'].astype('float')

print('# of df1_:', df1_.shape[0])
print('# of df2_:', df2_.shape[0])
print('# of df3_:', df3_.shape[0])
print('# of df4_:', df4_.shape[0])

In [None]:
plt.scatter(range(df1_.shape[0]), df1_)

In [None]:
plt.scatter(range(df2_.shape[0]), df2_)

In [None]:
plt.scatter(range(df3_.shape[0]), df3_)