In [166]:
import requests
import pandas as pd

url = "https://www.land.mlit.go.jp/webland/api/TradeListSearch"
parameters = {
    "from": "20221",
    "to": "20224",
    "area": "01"
}

response = requests.get(url, params=parameters)

if response.status_code == 200:
    data = response.json()  # JSON形式のデータを取得
    df = pd.DataFrame(data['data'])
else:
    print("Request was not successful. Status code:", response.status_code)


In [175]:
def RemoveM2(df_, needRemoveVal:dict):
    # target_wordの文字を含むデータのtarget_wordを除去
    # 新規列を追加し、除去した行はフラグを立てる
    for col, target_word in needRemoveVal.items():
        new_col = col+'_over_flag'
        target_row = df_.loc[:, col].str.contains(target_word).fillna(False)
        # print(col, target_word, sum(target_row))
        df_[new_col] = 0
        df_.loc[target_row, new_col] = 1
        df_[col] = df_.loc[:,col].str.replace(target_word,'')
        
    return df_

def changeDataType(df_,changeDataTypeVal:dict):
    for col,new_data_type in changeDataTypeVal.items():
        # print(col,new_data_type)
        df_[col] = df_.loc[:,col].astype(new_data_type)
    return df_

def ExtractRow(df_, extractCondionDic):
    for col, targetValue in extractCondionDic.items():
        target_row = df_[col].isin(targetValue)
        df_ = df_[target_row]
    return df_

In [177]:
drop_cols = ['PricePerUnit', 'Purpose', 'Direction', 'Classification']

# val: m2が含まれる値を、何に置換するか
needRemoveVal = {
    'Frontage': 'm以上', 
    'Area': '㎡以上',
    'TotalFloorArea':'㎡以上'
}
changeDataTypeVal = {
    'TradePrice':'int',
    'Frontage':'float',
    'Area':'int',
    'UnitPrice':'float',
    'TotalFloorArea':'float',
    'Breadth':'float',
    'CoverageRatio':'float',
    'FloorAreaRatio':'float'
}
extractCondionDic = {
    'Type':['宅地(土地と建物)', '中古マンション等', '宅地(土地)'],
    }

df2 = df.drop(drop_cols, axis=1)
df2 = RemoveM2(df2, needRemoveVal)
df2 = changeDataType(df2, changeDataTypeVal)
print(df2.shape)
df2 = ExtractRow(df2, extractCondionDic)
df2.shape
# df2.dtypes

(15187, 26)


(12861, 26)

In [165]:
df2.shape

(15187, 26)

In [76]:
target_col = 'TotalFloorArea'
i = 0
try:
    df.loc[:,target_col].astype('int')
    print('intOK')
    i = 1
except:
    print('int不可')
    
if i == 0:
    try:
        df.loc[:,target_col].astype('float')
        print('floatOK')
    except:
        print('float不可')
df.loc[:,target_col].unique()

int不可
float不可


array(['300', nan, '1700', '1800', '470', '990', '125', '230', '95',
       '600', '2000㎡以上', '240', '580', '120', '190', '370', '260', '130',
       '550', '185', '195', '90', '140', '80', '170', '290', '250', '155',
       '105', '730', '610', '280', '145', '210', '1100', '980', '115',
       '560', '320', '1000', '180', '830', '65', '680', '640', '840',
       '490', '420', '380', '740', '430', '220', '390', '1500', '45',
       '135', '630', '450', '920', '110', '1300', '710', '500', '85',
       '650', '340', '1200', '900', '530', '160', '175', '200', '810',
       '150', '100', '690', '1900', '660', '780', '930', '870', '410',
       '165', '440', '890', '330', '700', '520', '570', '720', '460',
       '350', '1400', '75', '70', '55', '360', '310', '880', '820', '670',
       '480', '400', '910', '590', '760', '750', '950', '60', '960',
       '790', '270', '850', '510', '540', '620', '25', '770', '20', '860',
       '35', '1600', '30', '40', '800', '50', '10', '940'], dtype=obje

In [70]:
df.Remarks

0        NaN
1        NaN
2        NaN
3        NaN
4        NaN
        ... 
15182    NaN
15183    NaN
15184    NaN
15185    NaN
15186    NaN
Name: Remarks, Length: 15187, dtype: object

In [69]:
df.pivot_table(index='Remarks', values='Type',aggfunc='count').sort_values('Type', ascending=False)

Unnamed: 0_level_0,Type
Remarks,Unnamed: 1_level_1
調停・競売等,68
私道を含む取引,53
隣地の購入,44
関係者間取引,35
古屋付き・取壊し前提,3
