In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns

In [2]:
from utils.Parallelize_DataFrame import *

In [32]:
from model.Preprocessing.create_data import *
from model.Preprocessing.haversine import *
from model.Preprocessing.holiday import *
from model.Preprocessing.hospital import *
from model.Preprocessing.school import *
from model.Preprocessing.particulates_matter import *
from model.Preprocessing.time_feature import *
from model.Preprocessing.trading_area import *
from model.Preprocessing.weather import *
from model.Preprocessing.count_nearby import *
from model.Preprocessing.impute import *

In [4]:
from datetime import datetime, timedelta

### 마이비 카드 데이터

In [5]:
%%time
mybicard = pd.read_parquet('/home/seho/Passenger_Demand/data/mybicard.parquet', engine='pyarrow')

CPU times: user 28.3 s, sys: 6.56 s, total: 34.9 s
Wall time: 18.3 s


In [6]:
# 수집일자 데이트 포맷으로 변환
#mybicard["collectdate"] = pd.to_datetime(mybicard["collectdate"], format = "%Y%m%d")

In [7]:
# 전송일자 데이트 포맷으로 변환
mybicard["transdate"] = pd.to_datetime(mybicard["transdate"], format = "%Y%m%d %H:%M:%S")

In [8]:
%%time
mybicard["totalcnt"] = mybicard["normalcnt"] + mybicard["studentcnt"] + mybicard["childcnt"]

CPU times: user 409 ms, sys: 316 ms, total: 725 ms
Wall time: 135 ms


In [9]:
%%time
# 전체 승객 수 변수 생성(일반 + 학생 + 아동)
mybicard["totalcnt"] = mybicard[["normalcnt", "studentcnt", "childcnt"]].sum(axis = 1)

CPU times: user 530 ms, sys: 1.2 s, total: 1.73 s
Wall time: 1.72 s


In [10]:
# route_nm에 공백이 포함되어 있어 공백 제거
mybicard["route_nm"] = mybicard["route_nm"].replace("\s", "", regex = True)

In [11]:
# ;mybicard = mybicard.sort_values(["transdate", "seq"]).reset_index(drop=True)

In [12]:
mybicard = mybicard.rename(columns = {"stop_id" : "mybi_stop_id"})

### 401번 버스

In [13]:
mybicard_401 = mybicard.loc[(mybicard["route_nm"] == "401") & (mybicard["transflag"].isin(["환승", "비환승"])), ["route_nm", "transdate", "mybi_stop_id", "normalcnt", "studentcnt", "childcnt", "totalcnt"]].reset_index(drop = True)

In [14]:
mybicard_401.shape

(1770869, 7)

In [15]:
mybicard_401.shape

(1770869, 7)

### 정류장 X,Y 좌표 추가

In [16]:
# 경주시, 양산시, 울산광역시, 부산광역시
bus_stop_info = pd.read_csv("/home/seho/Passenger_Demand/data/울산광역시_버스 정류소 위치 정보_20200531.csv", encoding = "euc-kr")
bus_stop_info = bus_stop_info.loc[bus_stop_info["권역"] == "울산광역시"]
bus_stop_info.columns = ["stop_nm", "stop_id", "longitude", "latitude", "city"]

In [17]:
bus_stop_401_1 = pd.read_csv("/home/seho/Passenger_Demand/data/401_율리_꽃바위.csv", encoding = "euc_kr")
bus_stop_401_2 = pd.read_csv("/home/seho/Passenger_Demand/data/401_꽃바위_율리.csv", encoding = "euc_kr")
bus_stop_401 = pd.concat([bus_stop_401_1, bus_stop_401_2])
bus_stop_401.columns = ["mybi_stop_id", "stop_id"]

In [18]:
bus_stop_401_info = pd.merge(bus_stop_401, bus_stop_info, on = "stop_id")

In [19]:
mybicard_401 = pd.merge(mybicard_401, bus_stop_401_info[["mybi_stop_id", "stop_id", "stop_nm", "longitude", "latitude"]], on = "mybi_stop_id", how = "inner")

In [20]:
mybicard_401 = mybicard_401.drop(["mybi_stop_id"], 1)

In [21]:
mybicard_401 = mybicard_401.loc[mybicard_401["transdate"].dt.hour.isin([1,2,3,4]) == False]

In [22]:
mybicard_401.shape

(1770869, 10)

In [23]:
bus_stop_info = mybicard_401[["stop_id", "stop_nm", "longitude", "latitude"]].drop_duplicates().reset_index(drop = True)

In [24]:
base_data = create_data_agg(mybicard_401, date_col = "transdate", stop_id_col = "stop_id", target_cols = ["totalcnt", "normalcnt", "studentcnt", "childcnt"], freq = "60min", groupby_cols = ["stop_nm", "latitude", "longitude"])

In [None]:
import pickle

In [171]:
base_data.to_pickle()

<bound method NDFrame.to_pickle of          stop_nm   latitude   longitude  stop_id           transdate  \
0            공업탑  35.532811  129.310320    40403 2020-04-08 05:00:00   
1            공업탑  35.532811  129.310320    40403 2020-04-08 06:00:00   
2            공업탑  35.532811  129.310320    40403 2020-04-08 07:00:00   
3            공업탑  35.532811  129.310320    40403 2020-04-08 08:00:00   
4            공업탑  35.532811  129.310320    40403 2020-04-08 09:00:00   
...          ...        ...         ...      ...                 ...   
291052  효정고등학교입구  35.560418  129.372658    30614 2020-11-07 18:00:00   
291053  효정고등학교입구  35.560418  129.372658    30614 2020-11-07 19:00:00   
291054  효정고등학교입구  35.560418  129.372658    30614 2020-11-07 21:00:00   
291055  효정고등학교입구  35.560418  129.372658    30614 2020-11-08 12:00:00   
291056  효정고등학교입구  35.560418  129.372658    30614 2020-11-08 15:00:00   

        totalcnt  normalcnt  studentcnt  childcnt  
0             10         10           0         

In [172]:
base_data.to_pickle("/home/seho/Passenger_Demand/data/base_data.pkl")

In [25]:
base_data.shape

(291057, 9)

In [26]:
bus_stop_info = base_data[["stop_id", "stop_nm", "longitude", "latitude"]].drop_duplicates().reset_index(drop = True)

In [86]:
all_date = create_all_date(data = base_data, date_col = "transdate", stop_id_col = "stop_id", except_hours=[1,2,3,4])

In [87]:
all_date.shape

(563300, 9)

In [88]:
all_date = add_time_features(data = all_date, date_col = "transdate")

### 결측치 

In [89]:
all_date = impute_bus_demand_data(data = all_date, date_col = "transdate", stop_id_col = "stop_id")

### 시계열 변수 생성

In [90]:
%%time
all_date = create_lag_feature(data = all_date, target_cols = ["totalcnt", "normalcnt", "studentcnt", "childcnt"], date_cols = "transdate", lags = ["1d", "2d", "3d", "4d", "5d", "6d", "7d"], groupby_cols = "stop_id")

CPU times: user 1.83 s, sys: 167 ms, total: 1.99 s
Wall time: 2.05 s


### 날짜별 평균 Lag

In [91]:
data_agg_daily_mean = create_data_agg(data = all_date, date_col = "transdate", stop_id_col = "stop_id", target_cols = ["totalcnt", "normalcnt", "studentcnt", "childcnt"], freq = "1D", agg_func = np.mean)

In [92]:
lags = ["1d", "2d", "3d", "4d", "5d", "6d", "7d"]
rename_dict = {f"{col}_bf_{lg}": f"{col}_bf_{lg}_total" for col in ["totalcnt"] for lg in lags}

In [93]:
daily_lag = create_lag_feature(data = data_agg_daily_mean, target_cols = "totalcnt", date_cols = "transdate", lags = lags, groupby_cols = "stop_id")

In [94]:
daily_lag = daily_lag.rename(columns = rename_dict)
daily_lag["date"] = daily_lag["transdate"].dt.date

In [95]:
all_date = pd.merge(all_date, daily_lag[["date", "stop_id"] + list(rename_dict.values())], on = ["date", "stop_id"], how = "left")

### Moving Average

#### 1) 이전 n개일자들의 동일 시간대 평균

In [96]:
%%time
all_date = calculate_moving_agg(data = all_date, target_cols = ["totalcnt"], date_col = "transdate", groupby_cols = ["stop_id", "hour"], col_nm = "hour", rollings = ["2d", "3d", "4d", "5d", "6d"])

CPU times: user 3.26 s, sys: 488 ms, total: 3.75 s
Wall time: 3.8 s


#### 2) n주전까지의 동일 요일의 동일 시간대 평균

In [97]:
%%time
all_date = calculate_moving_agg(data = all_date, target_cols = ["totalcnt"], date_col = "transdate", groupby_cols = ["stop_id", "hour", "dayofweek"], col_nm = "hour_week", rollings = ["14d", "21d", "28d"])

CPU times: user 3.69 s, sys: 214 ms, total: 3.9 s
Wall time: 3.95 s


#### 3) 이전 n개일자들의 전체 평균

In [98]:
data_agg_daily_sum = create_data_agg(mybicard_401, date_col = "transdate", stop_id_col = "stop_id", target_cols = ["totalcnt"], freq = "1D", agg_func = sum)

In [99]:
data_agg_daily_sum = add_time_features(data_agg_daily_sum, date_col = "transdate")

In [100]:
%%time
daily_mv_agg = calculate_moving_agg(data = data_agg_daily_sum, target_cols = ["totalcnt"], date_col = "transdate", groupby_cols = ["stop_id"], col_nm = "daily", rollings = ["2d", "3d", "4d", "5d", "6d"])
daily_mv_agg["date"] = daily_mv_agg["transdate"].dt.date
daily_mv_agg = daily_mv_agg.drop(["transdate", "totalcnt", "dayofweek", "hour", "month", "weekofyear"], 1)

CPU times: user 114 ms, sys: 0 ns, total: 114 ms
Wall time: 121 ms


In [101]:
all_date = pd.merge(all_date, daily_mv_agg, on = ["stop_id", "date"], how = "left")

#### 4) n주전까지의 동일 요일의 전체 평균

In [102]:
%%time
daily_week_mv_agg = calculate_moving_agg(data = data_agg_daily_sum, target_cols = ["totalcnt"], date_col = "transdate", groupby_cols = ["stop_id", "dayofweek"], col_nm = "daily_week", rollings = ["14d", "21d", "28d"])
daily_week_mv_agg["date"] = daily_week_mv_agg["transdate"].dt.date
daily_week_mv_agg = daily_week_mv_agg.drop(["transdate","totalcnt", "dayofweek", "hour", "month", "weekofyear"], 1)

CPU times: user 163 ms, sys: 372 µs, total: 163 ms
Wall time: 172 ms


In [103]:
all_date = pd.merge(all_date, daily_week_mv_agg, on = ["stop_id", "date"], how = "left")

#### 5) n주전까지의 주 평균의 이동평균

In [104]:
mybicard_401 = add_time_features(mybicard_401, date_col = "transdate")

In [105]:
data_agg_weekly_mean = create_data_agg(data_agg_daily_sum, date_col = "transdate", stop_id_col = "stop_id", groupby_cols = "weekofyear",  target_cols = ["totalcnt"], agg_func = np.mean)

In [106]:
weekly_mv_agg = calculate_moving_agg(data = data_agg_weekly_mean, target_cols = ["totalcnt"], date_col = "weekofyear", groupby_cols = "stop_id", col_nm = "weekly", rollings = [2,3,4])
weekly_mv_agg = weekly_mv_agg.drop("totalcnt", 1)

In [107]:
all_date = pd.merge(all_date, weekly_mv_agg, on = ["stop_id", "weekofyear"], how = "left")

### 특일 데이터 추가

In [108]:
holiday_data = pd.read_parquet("/home/seho/Passenger_Demand/data/holiday_data.parquet")

In [109]:
holiday_data = preprocess_holiday_data(holiday_data = holiday_data)

In [110]:
all_date = pd.merge(all_date, holiday_data, on = "date", how = "left")

### 날씨 데이터 추가

In [173]:
weather_data = pd.read_parquet("/home/seho/Passenger_Demand/data/weather_2018.parquet")

In [174]:
weather_data = preprocess_weather_data(weather_data = weather_data)

In [180]:
all_date["transdate"].dt.date

datetime.date(2020, 4, 8)

In [176]:
weather_data["time_hours"][0]

'2020-01-01 00'

### 미세먼지 데이터 추가

In [113]:
pm_data = pd.read_csv("/home/seho/Passenger_Demand/data/pm_data.csv")
# pm_data["issueDate"] = pd.to_datetime(pm_data["issueDate"], format = "%Y-%m-%d")

In [114]:
pm_data = preprocess_pm_data(pm_data = pm_data)

In [115]:
all_date = pd.merge(all_date, pm_data, how = "left", on = "date")

In [116]:
data = mybicard_401
num_cores = 12
stop_id_col = "stop_id"

In [117]:
def preprocessing_hospital_data(hospital_data):
    hospital_data["category"] = hospital_data["clCdNm"].replace({"한방병원" : "병원",
                                                                 "치과병원" : "병원",
                                                                 "정신병원" : "병원",
                                                                 "상급종합" : "종합병원",
                                                                 "부속의원" : "의원",
                                                                 "치과의원" : "의원",
                                                                 "한의원" : "의원",
                                                                 "보건진료소" : "보건소",
                                                                 "보건지소" : "보건소"})
    
    hospital_data = hospital_data[["addr", "category", "XPos", "YPos"]]
    hospital_data = hospital_data.rename(columns = {"XPos" : "longitude",
                                                    "YPos" : "latitude"})
    
    return hospital_data

def preprocessing_trading_area_data(trading_area_data):
    
    trading_area_data = trading_area_data[["상호명", "상권업종중분류명", "도로명주소", "위도", "경도"]]
    trading_area_data = trading_area_data.rename(columns = {"상호명" : "name",
                                                            "상권업종중분류명" : "category",
                                                            "도로명주소" : "addr",
                                                            "위도" : "latitude",
                                                            "경도" : "longitude"})
    
    return trading_area_data


def preprocessing_school_data(school_data): 
    school_data["category"] = school_data["학교종류"].replace({"전문대학(3년제)" : "전문대학",
                                                              "사내대학(전문)" : "전문대학",
                                                              "기능대학" : "전문대학",
                                                              "일반대학원" : "대학원",
                                                              "전문대학원" : "대학원",
                                                              "특수대학원" : "대학원",
                                                              "일반고등학교" : "고등학교",
                                                              "공업고등학교" : "고등학교",
                                                              "상업고등학교" : "고등학교",
                                                              "가사고등학교" : "고등학교",
                                                              "체육고등학교" : "고등학교",
                                                              "외국어고등학교" : "고등학교",
                                                              "과학고등학교" : "고등학교",
                                                              "예술고등학교" : "고등학교"})
    
    school_data = school_data[["학교명", "category", "지번주소", "latitude", "longitude"]]
    school_data = school_data.rename(columns = {"학교명" : "name",
                                                "지번주소" : "addr"})
    
    return school_data

In [118]:
# 공간적 특성 정보 추가 (상권정보, 학교정보, 병원정보)
# 정류장명, 정류장ID, 위도, 경도 정보
bus_stop_info = data[[stop_id_col, "stop_nm", "longitude", "latitude"]].drop_duplicates().reset_index(drop = True)

# 상권정보 변수 추가
trading_area_data = pd.read_csv("/home/seho/Passenger_Demand/data/울산광역시_상권정보_201231.csv")
# 상권 정보 전처리
trading_area_data = preprocessing_trading_area_data(trading_area_data = trading_area_data)
trading_area_category_list = trading_area_data["category"].drop_duplicates().to_list()
bus_stop_info = parallelize_dataframe(df = bus_stop_info, 
                                      func = count_nearby, 
                                      num_cores = num_cores, 
                                      col_nm = "trading_area",
                                      nearby_data = trading_area_data, 
                                      dist = 0.2,
                                      category_list = trading_area_category_list)

# 병원벙보 변수 추가
hospital_data = pd.read_csv("/home/seho/Passenger_Demand/data/api_data/hospital_data.csv")
# 병원 정보 전처리
hospital_data = preprocessing_hospital_data(hospital_data = hospital_data)
hospital_category_list = hospital_data["category"].drop_duplicates().to_list()
bus_stop_info = parallelize_dataframe(df = bus_stop_info, 
                                      func = count_nearby, 
                                      num_cores = num_cores, 
                                      col_nm = "hospital",
                                      nearby_data = hospital_data, 
                                      dist = 0.2,
                                      category_list = hospital_category_list)


# 학교정보 변수 추가
school_data = pd.read_csv("/home/seho/Passenger_Demand/data/school_data.csv")
school_data = preprocessing_school_data(school_data = school_data)

school_category_list = school_data["category"].drop_duplicates().to_list()
bus_stop_info = parallelize_dataframe(df = bus_stop_info, 
                                      func = count_nearby, 
                                      num_cores = num_cores,
                                      col_nm = "school",
                                      nearby_data = school_data, 
                                      dist = 0.2,
                                      category_list = school_category_list)

100%|██████████| 11/11 [00:12<00:00,  1.15s/it]
100%|██████████| 11/11 [00:12<00:00,  1.16s/it]
100%|██████████| 11/11 [00:12<00:00,  1.17s/it]
100%|██████████| 11/11 [00:12<00:00,  1.18s/it]
100%|██████████| 11/11 [00:13<00:00,  1.18s/it]
100%|██████████| 11/11 [00:13<00:00,  1.19s/it]
100%|██████████| 11/11 [00:12<00:00,  1.18s/it]
100%|██████████| 11/11 [00:12<00:00,  1.18s/it]
100%|██████████| 11/11 [00:12<00:00,  1.17s/it]
100%|██████████| 10/10 [00:11<00:00,  1.16s/it]
100%|██████████| 11/11 [00:12<00:00,  1.16s/it]
100%|██████████| 11/11 [00:12<00:00,  1.14s/it]
100%|██████████| 11/11 [00:00<00:00, 28.40it/s]
100%|██████████| 11/11 [00:00<00:00, 27.64it/s]
100%|██████████| 11/11 [00:00<00:00, 28.12it/s]
100%|██████████| 11/11 [00:00<00:00, 27.33it/s]
100%|██████████| 11/11 [00:00<00:00, 26.59it/s]
100%|██████████| 11/11 [00:00<00:00, 27.00it/s]
100%|██████████| 11/11 [00:00<00:00, 27.30it/s]
100%|██████████| 11/11 [00:00<00:00, 28.05it/s]
100%|██████████| 11/11 [00:00<00:00, 26.

In [121]:
all_date = pd.merge(all_date, bus_stop_info.drop(["stop_nm", "latitude", "longitude"], 1), on = ["stop_id"])

#### 대학 정보

### 정류장 정보 Join(거리기반)

In [109]:
ml_data = pd.merge(ml_data, bus_stop_401_info.drop(["stop_id", "city"],1), on = "mybi_stop_id")

In [110]:
ml_data.shape
# (620940, 175)

(620940, 175)

### 울산행사정보

In [155]:
event_data = pd.read_csv("~/Passenger_Demand/data/event_data.csv")

In [156]:
def preprocess_event_data(event_data, start_date, end_date):
    
    event_data["eventStartTime"] = (event_data["eventStartTime"].str.extract(r"(\d+):")[0]
                                                                .str.pad(width=2, side='left', fillchar='0')
                                                                .replace({"24":"23"}))
    event_data["eventEndTime"] = (event_data["eventEndTime"].str.extract(r"(\d+):")[0]
                                                            .str.pad(width=2, side='left', fillchar='0')
                                                            .replace({"24":"23"}))
    
    event_data["startDate"] = pd.to_datetime(event_data["eventStartDate"] + " " + event_data["eventStartTime"], format = "%Y-%m-%d %H")
    event_data["endDate"] = pd.to_datetime(event_data["eventEndDate"] + " " + event_data["eventEndTime"], format = "%Y-%m-%d %H")
    
    event_data = event_data[["eventNm", "rdnmadr", "startDate", "endDate", "latitude", "longitude"]]
    event_data = event_data.loc[event_data["startDate"].dt.date.between(pd.to_datetime(start_date).date(), pd.to_datetime(end_date).date())].reset_index(drop = True)
    
    return event_data

In [157]:
event_data = preprocess_event_data(event_data = event_data, start_date = "2020-01-01", end_date = "2020-12-31")

In [167]:
def count_time_nearby(data, date_col, nearby_data, col_nm, dist = 0.2):
    data_copy = data.copy()
    within_data = nearby_data.loc[(event_data["startDate"] <= data_copy[date_col]) & (event_data["endDate"] >= data_copy[date_col])]
    
    if len(within_data) == 0:
        data_copy[f"{col_nm}_nearby"] = 0
    else:
        dist_list = within_data[["latitude", "longitude"]].apply(lambda x: haversine((x["latitude"], x["longitude"]), (data_copy["latitude"], data_copy["longitude"])), 1)
        data_copy[f"{col_nm}_nearby"] = (dist_list <= dist).sum()

    return data_copy

In [127]:
%%time
all_date = parallelize_dataframe(df = all_date, 
                                 func = count_time_nearby, 
                                 num_cores = 12, 
                                 date_col = "transdate",
                                 col_nm = "event",
                                 nearby_data = event_data, 
                                 dist = 0.2)

100%|█████████▉| 46794/46942 [03:14<00:00, 269.32it/s]
100%|██████████| 46942/46942 [03:22<00:00, 231.37it/s]
100%|██████████| 46942/46942 [03:22<00:00, 231.51it/s]
100%|██████████| 46942/46942 [03:21<00:00, 232.85it/s]
100%|██████████| 46942/46942 [03:21<00:00, 233.07it/s]
100%|██████████| 46942/46942 [03:26<00:00, 227.32it/s]
100%|██████████| 46941/46941 [03:21<00:00, 232.85it/s]
100%|██████████| 46942/46942 [03:26<00:00, 227.53it/s]
100%|██████████| 46942/46942 [03:26<00:00, 227.63it/s]
100%|██████████| 46941/46941 [03:21<00:00, 232.97it/s]
100%|██████████| 46941/46941 [03:20<00:00, 233.75it/s]
100%|██████████| 46941/46941 [03:20<00:00, 234.16it/s]


CPU times: user 26.7 s, sys: 15.2 s, total: 41.8 s
Wall time: 3min 37s


In [116]:
ml_data.shape
# (620940, 176)

(620940, 176)

### 축제 정보

In [160]:
festival_data = pd.read_csv("~/Passenger_Demand/data/festival_data.csv")

In [161]:
festival_data.shape

(21, 18)

In [162]:
def preprocess_festival_data(festival_data, start_date, end_date):
    
    festival_data["startDate"] = pd.to_datetime(festival_data["fstvlStartDate"])
    festival_data["endDate"] = pd.to_datetime(festival_data["fstvlEndDate"])
    
    festival_data = festival_data[["fstvlNm", "rdnmadr", "startDate", "endDate", "latitude", "longitude"]]
    festival_data = festival_data.loc[festival_data["startDate"].dt.date.between(pd.to_datetime(start_date).date(), 
                                                                                 pd.to_datetime(end_date).date())].reset_index(drop = True)
    
    return festival_data
    

In [164]:
festival_data = preprocess_festival_data(festival_data = festival_data, start_date = "2020-01-01", end_date = "2020-12-31")

In [168]:
%%time
all_date = parallelize_dataframe(df = all_date, 
                                 func = count_time_nearby, 
                                 num_cores = 12, 
                                 date_col = "transdate",
                                 col_nm = "event",
                                 nearby_data = festival_data, 
                                 dist = 0.2)

100%|██████████| 46942/46942 [00:59<00:00, 788.41it/s]
100%|██████████| 46942/46942 [01:00<00:00, 770.92it/s]
100%|██████████| 46942/46942 [01:00<00:00, 773.16it/s]
100%|██████████| 46942/46942 [01:00<00:00, 774.29it/s] 
100%|██████████| 46942/46942 [01:00<00:00, 780.43it/s]]
100%|██████████| 46942/46942 [01:00<00:00, 770.68it/s]]
100%|██████████| 46942/46942 [01:01<00:00, 761.99it/s] 
100%|██████████| 46942/46942 [01:01<00:00, 762.65it/s] 
100%|██████████| 46941/46941 [01:01<00:00, 768.96it/s] 
100%|██████████| 46941/46941 [00:59<00:00, 794.96it/s] 
100%|██████████| 46941/46941 [01:00<00:00, 772.74it/s] 
100%|██████████| 46941/46941 [00:58<00:00, 803.20it/s] 


CPU times: user 17 s, sys: 7.22 s, total: 24.2 s
Wall time: 1min 14s


In [122]:
ml_data.shape
# (620940, 177)

(620940, 177)

In [126]:
ml_data.drop(["weekofyear", "date"], 1).to_pickle("/home/seho/Passenger_Demand/data/ml_data.pkl")

### 인구 정보

In [None]:
population_data = pd.read_csv("~/Passenger_Demand/data/울산광역시_인구 현황_20200727.csv", encoding = "euc-kr")

In [None]:
gmaps = googlemaps.Client(key='AIzaSyBRxjIW7qfFhaVyCsc2xhk5mf1hXUSi9DI')

In [None]:
rq = requests.get("https://maps.googleapis.com/maps/api/geocode/json?latlng=35.60467,129.4328&key=AIzaSyBRxjIW7qfFhaVyCsc2xhk5mf1hXUSi9DI")

In [None]:
"https://maps.googleapis.com/maps/api/geocode/json?latlng=35.60467,129.4328&key=AIzaSyBRxjIW7qfFhaVyCsc2xhk5mf1hXUSi9DI"

In [None]:
gmaps.reverse_geocode((35.60467, 129.4328), language = "korean")