In [1]:
import chardet
import pandas as pd

pd.set_option('display.max_columns', None)

with open('dataset.csv', 'rb') as f:
    result = chardet.detect(f.read())
    encoding = result['encoding']

data = pd.read_csv('dataset.csv', encoding=encoding)
df = data

In [2]:
df = df.drop(columns=['읍면동구분명', '번지', '도로명','발화요인소분류명'])
df = df.rename(columns={'Unnamed: 0': 'ID'})

In [3]:
#화재발생일자
df['화재발생일자'] = pd.to_datetime(df['화재발생일자'], format='%Y%m%d')

#화재발생시간
df['화재발생시간'] = df['화재발생시간'].astype(str).str.zfill(6)
df['화재발생시간'] = df['화재발생시간'].str[:2] + ':' + df['화재발생시간'].str[2:4] + ':' + df['화재발생시간'].str[4:]
df['화재발생시간'] = pd.to_datetime(df['화재발생시간'], format='%H:%M:%S')

#화재발생시간대
df['화재발생시간대'] = pd.cut(df['화재발생시간'].dt.hour, bins=8, labels=['0~3', '3~6', '6~9', '9~12', '12~15', '15~18', '18~21', '21~24'])

df['화재발생시간'] = df['화재발생시간'].dt.strftime('%H:%M:%S')

In [4]:
df['요일'] = df['화재발생일자'].dt.weekday  # 월=0, 화=1, ..., 일=6 매핑

In [5]:
df['화재발생_연도'] = df['화재발생일자'].dt.year
df['화재발생_월'] = df['화재발생일자'].dt.month
df['화재발생_일'] = df['화재발생일자'].dt.day

In [6]:
def merge_reasons(row):
    if row['발화요인대분류명'] == '부주의' or row['발화원인'] == '입산자실화' or row['발화원인'] == '성묘객' or row['발화원인'] == '담뱃불':
        return '인간의 부주의'
    
    elif row['발화요인대분류명'] == '전기적요인' or row['발화요인대분류명'] == '교통사고' or row['발화요인대분류명'] == '화학적요인' or row['발화요인대분류명'] == '기계적요인':
        return '기계/전기/화학 요인'

    elif ( row['발화원인'] == '미상' or row['발화원인'] == '기타' or row['발화원인'] == '건물') and ( row['발화요인대분류명'] == '미상' or row['발화요인대분류명'] == '기타' or row['발화요인대분류명'] == '비화'):
        return '미상'
    
    elif row['발화요인대분류명'] == '방화' or row['발화요인대분류명'] == '방화의심':
        return '방화 및 방화의심'
    
    elif row['발화요인대분류명'] == '자연적인요인':
        return '자연적 요인'
    else:
        return '기타'
    
df['발화종합'] = df.apply(merge_reasons, axis=1)

In [7]:
count = (df['발화종합'] == '기타').sum()
count

0

In [8]:
# 컬럼 이름들을 출력합니다.
print(df.columns)

Index(['ID', '화재발생일자', '화재발생시간', '시도명', '시군구명', '읍면동명', '발화지점', '날씨', '온도',
       '습도', '특보내용', '발화원인', '발화요인대분류명', '접수일자', '접수시각', '소방서명', '서센터명',
       '소방지역대명', '출동일자', '출동시각', '현장도착일자', '현장도착시각', '출동소요시간', '현장소방서거리',
       '현장안전센터거리', '현장소방지역대거리', '초진일자', '초진시각', '전체인력수합계', '완진일자', '완진시간',
       '화재진압시간', '동원장비수', '인명피해수', '재산피해금액', '피해면적', '화재발생시간대', '요일',
       '화재발생_연도', '화재발생_월', '화재발생_일', '발화종합'],
      dtype='object')


In [9]:
df = df.reindex(columns=['ID', '화재발생일자', '화재발생시간', '화재발생_연도', '화재발생_월', '화재발생_일', '요일', '화재발생시간대', '시도명', '시군구명', '읍면동명', '발화지점', '날씨', '온도',
       '습도', '특보내용',
        '발화종합', '발화원인', '발화요인대분류명', '접수일자', '접수시각',
       '소방서명', '서센터명', '소방지역대명', '출동일자', '출동시각', '현장도착일자', '현장도착시각',
       '출동소요시간', '현장소방서거리', '현장안전센터거리', '현장소방지역대거리', '초진일자', '초진시각',
       '전체인력수합계', '완진일자', '완진시간', '화재진압시간', '동원장비수', '인명피해수', '재산피해금액', '피해면적'])

In [10]:
df

Unnamed: 0,ID,화재발생일자,화재발생시간,화재발생_연도,화재발생_월,화재발생_일,요일,화재발생시간대,시도명,시군구명,읍면동명,발화지점,날씨,온도,습도,특보내용,발화종합,발화원인,발화요인대분류명,접수일자,접수시각,소방서명,서센터명,소방지역대명,출동일자,출동시각,현장도착일자,현장도착시각,출동소요시간,현장소방서거리,현장안전센터거리,현장소방지역대거리,초진일자,초진시각,전체인력수합계,완진일자,완진시간,화재진압시간,동원장비수,인명피해수,재산피해금액,피해면적
0,0,2011-01-22,23:35:00,2011,1,22,5,21~24,강원도,강릉시,죽헌동,산정상,맑음,-1,30,,방화 및 방화의심,기타,방화의심,20110122,233511,강릉소방서,경포119안전센터,,20110122,233615,20110122,235315,1700,9,9,0.0,20110123,121115,37,20110123,1115,1800,8,0,0,0.30
1,1,2011-01-31,18:13:00,2011,1,31,0,18~21,강원도,양양군,현남면,산아래,맑음,1,10,건조경보,인간의 부주의,기타,부주의,20110131,181311,양양소방서,하조대119안전센터,,20110131,181409,20110131,183024,1615,43,13,0.0,20110201,94538,1872,20110201,103016,155952,71,0,184330,30.00
2,2,2011-02-11,17:50:00,2011,2,11,4,15~18,강원도,홍천군,내촌면,산중턱,구름많음,-2,29,,인간의 부주의,입산자실화,기타,20110211,175032,홍천소방서,서석119안전센터,내촌119지역대,20110211,175220,20110211,181500,2240,37,37,7.0,20110211,74100,171,20110211,202000,20500,9,0,4864,0.30
3,3,2011-02-12,06:18:00,2011,2,12,5,3~6,강원도,원주시,귀래면,산중턱,구름많음,-6,44,,인간의 부주의,모닥불,부주의,20110212,61836,원주소방서,흥업119안전센터,귀래119지역대,20110212,62010,20110212,64029,2019,20,20,8.0,20110212,82038,94,20110212,103038,35009,9,0,50,0.30
4,4,2011-02-26,16:42:00,2011,2,26,5,15~18,강원도,원주시,태장동,산정상,맑음,14,23,,인간의 부주의,담뱃불,부주의,20110226,164234,원주소방서,태장119안전센터,,20110226,164353,20110226,164901,508,8,2,0.0,20110226,52601,46,20110226,172726,3825,8,0,440,0.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,534,2021-05-09,13:43:01,2021,5,9,6,12~15,강원도,철원군,철원읍,산중턱,맑음,19,26,,인간의 부주의,담뱃불,부주의,20210509,134300,철원소방서,동송119안전센터,,20210509,134415,20210509,135216,801,16,7,0.0,20210509,141148,60,20210509,142131,2915,12,0,0,0.03
535,535,2021-05-09,17:44:49,2021,5,9,6,15~18,강원도,철원군,철원읍,산정상,맑음,18,22,,인간의 부주의,입산자실화,부주의,20210509,174400,철원소방서,동송119안전센터,,20210509,174536,20210509,174923,347,12,1,0.0,20210509,183358,67,20210509,183452,4529,13,0,0,0.01
536,536,2021-05-13,15:37:05,2021,5,13,3,15~18,강원도,태백시,하사미동,산중턱,맑음,23,43,,기계/전기/화학 요인,기타,기계적요인,20210513,153700,태백소방서,화전119안전센터,,20210513,153756,20210513,155917,2121,20,17,0.0,20210513,182000,144,20210513,203416,43459,18,0,0,1.20
537,537,2021-06-07,15:11:39,2021,6,7,0,15~18,강원도,강릉시,주문진읍,산중턱,맑음,25,62,,인간의 부주의,논/밭두렁,부주의,20210607,151100,강릉소방서,주문진119안전센터,,20210607,151226,20210607,152610,1344,21,6,0.0,20210607,153855,50,20210607,165149,12539,11,0,223,0.01


In [11]:
df.to_csv('restructuring_1.csv', index=False, encoding='utf-8-sig')