In [4]:
import requests
import pandas as pd
import json

def get_kosis_data(api_url):
    # 통계청 OpenAPI URL로 DataFrame 가져오기

    try:
        # API 호출
        response = requests.get(api_url)
        print(f"응답 상태: {response.status_code}")

        # JSON 데이터 파싱
        if response.status_code == 200:
            json_data = response.json()

            # DataFrame 변환
            df = pd.DataFrame(json_data)
            print(f"데이터 수집 완료: {len(df)}행")
            return df
        else:
            print(f"API 호출 실패: {response.status_code}")
            return None

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

# 사용법
your_api_url = "https://kosis.kr/openapi/Param/statisticsParameterData.do?method=getList&apiKey=YWE5NThkOGJhMTQxYzgyMmVmNDEwNGIzMWM2YzIzNDM=&itmId=T70+T80+&objL1=ALL&objL2=ALL&objL3=&objL4=&objL5=&objL6=&objL7=&objL8=&format=json&jsonVD=Y&prdSe=Q&newEstPrdCnt=9&prdInterval=1&orgId=101&tblId=DT_1B26003_A01"
df = get_kosis_data(your_api_url)

# 결과 확인
if df is not None:
    print(df.head())
    print(f"컬럼: {df.columns.tolist()}")


응답 상태: 200
데이터 수집 완료: 5832행
  C1_OBJ_NM C2_NM       DT  C2  C1 PRD_SE UNIT_NM_ENG ITM_ID          TBL_ID  \
0      전출지별    전국  1717469  00  00      Q      Person    T70  DT_1B26003_A01   
1      전출지별    전국  1426239  00  00      Q      Person    T70  DT_1B26003_A01   
2      전출지별    전국  1470262  00  00      Q      Person    T70  DT_1B26003_A01   
3      전출지별    전국  1514768  00  00      Q      Person    T70  DT_1B26003_A01   
4      전출지별    전국  1814718  00  00      Q      Person    T70  DT_1B26003_A01   

  ITM_NM  ...  LST_CHN_DE      C1_NM_ENG C1_NM UNIT_NM ITM_NM_ENG  \
0   이동자수  ...  2023-04-24  Whole Country    전국       명   Migrants   
1   이동자수  ...  2023-07-10  Whole Country    전국       명   Migrants   
2   이동자수  ...  2023-10-16  Whole Country    전국       명   Migrants   
3   이동자수  ...  2024-01-10  Whole Country    전국       명   Migrants   
4   이동자수  ...  2024-04-08  Whole Country    전국       명   Migrants   

            C2_OBJ_NM_ENG      C2_NM_ENG ORG_ID            C1_OBJ_NM_ENG  \


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
move_df = pd.read_csv('/content/drive/MyDrive/데이터마이닝/기말/df_20250617/Data_Analysis_Project/population_migration_korean_preprocessed.csv')

In [7]:
kb_df = pd.read_csv('/content/drive/MyDrive/데이터마이닝/기말/df_20250617/Data_Analysis_Project/kosis_employment_data.csv')

In [8]:
# 분기별_연도 형식 변환 함수 정의
def convert_quarter_format(q):
    year = str(q)[:4]
    quarter = str(q)[4:]
    return f'{year}Q{quarter}'

In [9]:
# 변환 적용
move_df['분기별_연도'] = move_df['분기별_연도'].apply(convert_quarter_format)

In [12]:
# 컬럼명 통일
kb_df = kb_df.rename(columns={'분기별 연도': '분기별_연도'})

In [13]:
# 경상북도만 추출
gyeongbuk_move = move_df[move_df['지역'] == '경상북도']

In [14]:
# 합치기 위한 간단화
gyeongbuk_sum = gyeongbuk_move.drop_duplicates(subset=['분기별_연도'])[['분기별_연도', '전입자수', '전출자수']]

In [15]:
merged_df = pd.merge(kb_df, gyeongbuk_sum, on='분기별_연도', how='left')

In [16]:
merged_df.head()

Unnamed: 0,연도,분기,분기별_연도,지역,고용률,취업자,실업률,실업자,전입자수,전출자수
0,2015,1,2015Q01,경상북도,60.0,1375.9,4.0,57.7,29208.0,30848.0
1,2015,2,2015Q02,경상북도,63.5,1457.7,3.0,44.5,31184.0,32243.0
2,2015,3,2015Q03,경상북도,63.1,1450.9,2.7,39.5,35808.0,37122.0
3,2015,4,2015Q04,경상북도,62.8,1447.5,2.6,38.1,28237.0,28861.0
4,2016,1,2016Q01,경상북도,60.6,1400.1,3.8,55.0,27354.0,29039.0


In [17]:
merged_df.to_csv('/content/drive/MyDrive/데이터마이닝/기말/df_20250617/Data_Analysis_Project/KB_employment_population_move_merged.csv', index=False)