In [1]:
import pandas as pd
import numpy as np
import os
from collections import defaultdict
import requests
import ast
import xmltodict
import numpy as np
from bs4 import BeautifulSoup
from collections import defaultdict


class Data_by_API(object):
    
    def __init__(self, url):
        self.url = url
        self.features = None
        self.main_key = None
#         self.serviceKey = serviceKey
    
    
    def calculate_max_page(self, type = "json"):
        rq = self.request()
        
        rq_dict = self.to_dict(txt = rq.text, type = type)
        
        self.n_rows = int(self.params_dict["numOfRows"])
        
        try:
            self.total_count = int(rq_dict["response"]["body"]["totalCount"])
        except:
            xmlsoup = BeautifulSoup(rq.text,'html.parser')
            self.total_count = int(xmlsoup.find("totalcount").text)
                
        max_page = int(np.ceil(self.total_count / self.n_rows))
        
        print(f"n_rows : {self.n_rows}, total_count : {self.total_count}, max_page = {max_page}")
        
        return max_page
    
    
    def create_request_url(self, params_dict):
#         params_dict["service_key"] = self.serviceKey
        params_list = [f"{k}={v}" for k, v in params_dict.items()]
        params_str = "&".join(params_list)
#         print(params_str)
        
        self.request_url = self.url + params_str
        
        return self.request_url
    
    
    def create_request_urls(self):
        max_page = self.calculate_max_page(type = self.type)
        
        params_dict = self.params_dict.copy()
        
        request_urls= []
        for i in range(max_page):
            params_dict["pageNo"] = i + 1
            request_urls.append(self.create_request_url(params_dict = params_dict))
            
        return request_urls
    

    
    def to_dict(self, txt, type):
        # json / xml to dict
        if type == "json":
            rq_dict = ast.literal_eval(txt)
        elif type == "xml":
            rq_dict = xmltodict.parse(txt)
            
        return rq_dict
    
    
    def extract_values_from_dict(self, dct):
        try: 
            dict_list = dct["response"]["body"]["items"]["item"]
        except:
            dict_list = dct["response"]["body"]["items"]
            
        return dict_list
    
    
    
    def parse(self, request, features = None, type = "json"):
        
        data_dict = defaultdict(list)
        
        rq_dict = self.to_dict(txt = request.text, type = type)
        
        # 일부 url의 경우는 item이 아닌 items에 값이 존재
        dict_list = self.extract_values_from_dict(dct = rq_dict)
        self.dict_list = dict_list
        # 값이 1개인 경우 list가 아니라 dictionary 1개가 반환되므로, 이를 list(dict)형태로 변환
        if isinstance(dict_list, dict):
            dict_list = [dict_list]
        
        # item이 없는 경우 빈 Dictionary(data dict)를 반환
        if dict_list is None:
            return data_dict
        
        if features is None:
            features = dict_list[0].keys()
            
        for x in dict_list:
            for col in features:
                data_dict[col].append(x.get(col))

        return data_dict
    
    
    def request(self, request_url = None):
        
        if request_url == None:
            request_url = self.request_url
            
        rq = requests.get(request_url, allow_redirects = True)
        
        return rq
        



class Weather_Data_by_API(Data_by_API):
    
    base_url = "http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList?"
    
    def __init__(self, params_dict):
        super().__init__(url = self.base_url)
        self.request_url = super().create_request_url(params_dict = params_dict)
        self.params_dict = params_dict
        self.type = params_dict["dataType"].lower()
    
    
    def get(self):
        
        self.request_urls = self.create_request_urls()

        data_dict = defaultdict(list)
        for request_url in self.request_urls:
            rq = self.request(request_url = request_url)
            text_dict = self.parse(request = rq, features = None, type = self.type)
            
            for k, v in text_dict.items():
                data_dict[k].extend(v)
            
        return pd.DataFrame(data_dict)
    
    
def Load_Weather_Data(params_dict,
                      save_tf = False, 
                      save_path = os.getcwd()):
    
    weather_api = Weather_Data_by_API(params_dict = params_dict)
    weather_data = weather_api.get()
    
    
    # index 초기화
    weather_data = weather_data.drop_duplicates().reset_index(drop=True)
  
    # 저장여부 변수가 True면 csv파일로 저장, False면 Df로 리턴
    if save_tf == True :
        weather_data.to_csv(save_path +'/weather_data.csv', index=False)
    else :
        return weather_data

In [12]:
save_path = "./asdfadsf/asdfasdf"

In [13]:
if os.path.exists(save_path) == False:
    os.makedirs(save_path)

In [2]:
service_key = 'eLWdQyzctRdtv8bEOuewsTtK6sNkoWp1bE74OUBk43jg4tU6AsI6yYt6Z%2B7sOeaqtB5pTH2yHuPRIuEHtu5amQ%3D%3D'
google_key = "ReOsv=IfT43PVLSiA4vDRjs=40TCqIw97oVP2D9QpmU="

In [3]:
base_year  = "2020"

In [8]:
weather_params = {"serviceKey" : service_key,
                   "stnIds" : "152",
                   "startDt" : f"{base_year}0101",
                   "startHh" : "00",
                   "endDt" : f"{base_year}1230", 
                   "endHh" : "23",
                   "numOfRows" : "900",
                   "dataType" : "XML",
                   "pageNo" : "1",
                   "dataCd" : "ASOS",
                   "dateCd" : "HR"}

In [11]:
weather_api.request_url

'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList?serviceKey=eLWdQyzctRdtv8bEOuewsTtK6sNkoWp1bE74OUBk43jg4tU6AsI6yYt6Z%2B7sOeaqtB5pTH2yHuPRIuEHtu5amQ%3D%3D&stnIds=152&startDt=20200101&startHh=00&endDt=20201230&endHh=23&numOfRows=900&dataType=XML&pageNo=1&dataCd=ASOS&dateCd=HR'

In [10]:
weather_api = Weather_Data_by_API(params_dict = weather_params)
weather_data = weather_api.get()

AttributeError: 'NoneType' object has no attribute 'text'

In [9]:
# 날씨 정보 수집
Load_Weather_Data(weather_params)

AttributeError: 'NoneType' object has no attribute 'text'

In [16]:
import os

In [17]:
f = open("/home/ds_user1/.ssh/id_rsa.pub", "r")
text = f.read()
print(text)

FileNotFoundError: [Errno 2] No such file or directory: '/home/ds_user1/.ssh/id_rsa.pub'

In [11]:
from googlemaps import exceptions
exceptions.ApiError

In [14]:
exceptions.ApiError

googlemaps.exceptions.ApiError

In [8]:
import subprocess
subprocess.call(["pip", "install", "bs4"])

0

In [4]:
holiday_params_dict = {"serviceKey" : 'eLWdQyzctRdtv8bEOuewsTtK6sNkoWp1bE74OUBk43jg4tU6AsI6yYt6Z%2B7sOeaqtB5pTH2yHuPRIuEHtu5amQ%3D%3D', "solYear" : 2020}

import subprocess
subprocess.call(["pip", "install", "beautifulsoup4"])
subprocess.call(["pip", "install", "xmltodict"])
subprocess.call(["pip", "install", "ast"])

import pandas as pd
import requests
import ast
import xmltodict
import numpy as np
from bs4 import BeautifulSoup
from collections import defaultdict


class Data_by_API(object):
    
    def __init__(self, url):
        self.url = url
        self.features = None
        self.main_key = None
#         self.serviceKey = serviceKey
    
    
    def calculate_max_page(self, type = "json"):
        rq = self.request()
        
        rq_dict = self.to_dict(txt = rq.text, type = type)
        
        self.n_rows = int(self.params_dict["numOfRows"])
        
        try:
            self.total_count = int(rq_dict["response"]["body"]["totalCount"])
        except:
            xmlsoup = BeautifulSoup(rq.text,'html.parser')
            self.total_count = int(xmlsoup.find("totalcount").text)
                
        max_page = int(np.ceil(self.total_count / self.n_rows))
        
        print(f"n_rows : {self.n_rows}, total_count : {self.total_count}, max_page = {max_page}")
        
        return max_page
    
    
    def create_request_url(self, params_dict):
#         params_dict["service_key"] = self.serviceKey
        params_list = [f"{k}={v}" for k, v in params_dict.items()]
        params_str = "&".join(params_list)
#         print(params_str)
        
        self.request_url = self.url + params_str
        
        return self.request_url
    
    
    def create_request_urls(self):
        max_page = self.calculate_max_page(type = self.type)
        
        params_dict = self.params_dict.copy()
        
        request_urls= []
        for i in range(max_page):
            params_dict["pageNo"] = i + 1
            request_urls.append(self.create_request_url(params_dict = params_dict))
            
        return request_urls
    

    
    def to_dict(self, txt, type):
        # json / xml to dict
        if type == "json":
            rq_dict = ast.literal_eval(txt)
        elif type == "xml":
            rq_dict = xmltodict.parse(txt)
            
        return rq_dict
    
    
    def extract_values_from_dict(self, dct):
        try: 
            dict_list = dct["response"]["body"]["items"]["item"]
        except:
            dict_list = dct["response"]["body"]["items"]
            
        return dict_list
    
    
    
    def parse(self, request, features = None, type = "json"):
        
        data_dict = defaultdict(list)
        
        rq_dict = self.to_dict(txt = request.text, type = type)
        
        # 일부 url의 경우는 item이 아닌 items에 값이 존재
        dict_list = self.extract_values_from_dict(dct = rq_dict)
        self.dict_list = dict_list
        # 값이 1개인 경우 list가 아니라 dictionary 1개가 반환되므로, 이를 list(dict)형태로 변환
        if isinstance(dict_list, dict):
            dict_list = [dict_list]
        
        # item이 없는 경우 빈 Dictionary(data dict)를 반환
        if dict_list is None:
            return data_dict
        
        if features is None:
            features = dict_list[0].keys()
            
        for x in dict_list:
            for col in features:
                data_dict[col].append(x.get(col))

        return data_dict
    
    
    def request(self, request_url = None):
        
        if request_url == None:
            request_url = self.request_url
            
        rq = requests.get(request_url, allow_redirects = True)
        
        return rq

class Holiday_Data_by_API(Data_by_API):
    
    holiday_url = "http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getHoliDeInfo?"
    restday_url = "http://apis.data.go.kr/B090041/openapi/service/SpcdeInfoService/getRestDeInfo?"
    
    def __init__(self, params_dict, type):
        if type == "holi":
            base_url = self.holiday_url
        elif type == "rest":
            base_url = self.restday_url
            
        super().__init__(url = base_url)
#         self.year = year
        self.params_dict = params_dict
        self.type = "xml"
        
    
    def create_request_urls(self, params_dict):
        params_dict = params_dict.copy()
        request_urls = []
        for x in range(1, 13):
            if x < 10:
                params_dict["solMonth"] =  f"0{str(x)}"

            else:
                params_dict["solMonth"] =  str(x)
                
            request_urls.append(self.create_request_url(params_dict = params_dict))
    
        return request_urls
    
    
    def get(self):
        
        if "solMonth" in self.params_dict.keys():
            self.request_urls = [self.create_request_url(params_dict = self.params_dict)]
        else:             
            self.request_urls = self.create_request_urls(params_dict = self.params_dict)
        
        data_dict = defaultdict(list)
        for request_url in self.request_urls:
            
            rq = super().request(request_url = request_url)
            temp_dict = super().parse(request = rq, features = ["locdate", "dateName"], type = self.type)
            
            for k, v in temp_dict.items():
                data_dict[k].extend(v)

                    
        return pd.DataFrame(data_dict)
    


def Load_Holiday_Data(params_dict,
                      save_tf = False, 
                      save_path = os.getcwd()):
    
    holiday_api = Holiday_Data_by_API(params_dict = params_dict, type = "rest")
    holiday_data = holiday_api.get()
    
    # index 초기화
    holiday_data = holiday_data.reset_index(drop=True)
  
    # 저장여부 변수가 True면 csv파일로 저장, False면 Df로 리턴
    if save_tf == True :
        holiday_data.to_csv(save_path +'/holiday_data.csv', index=False)
    else :
        return holiday_data
    
holiday_data = Load_Holiday_Data(holiday_params_dict)

In [None]:
from Data_Load import *

In [None]:
from parameters import *

In [3]:
city = "울산"

In [4]:
# 학교(초중고) 정보 수집
Load_School_Data(school_params_dict,
                 select_region = city, save_tf = True, save_path = "/home/seho/Passenger_Demand/data/api_data/")

n_rows : 1000, total_count : 500, max_page = 1


In [6]:
# 대학교 정보 수집
Load_University_Data(university_params_dict,
                     google_key = 'AIzaSyDfLv3OzniRbUc7tTRBJndpiuyepHSmUrE',
                     select_region = city,
                     save_tf = True,
                     save_path = "/home/seho/Passenger_Demand/data/api_data/")

TypeError: get_geocodeDf() takes 2 positional arguments but 3 were given