In [2]:
import numpy as np
import pandas as pd
import json
import sklearn
from sklearn import utils, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import joblib
import math


In [3]:
# 1. 분석 데이터 준비 
org_df =pd.read_csv('ETL_DATA/OsanDataset.csv')
org_df.head()

Unnamed: 0,id,acc_sum,lon1,lat1,lon2,lat2,cctv,cross,speedBump,parking,...,youth,baby,build,latM,lonM,youchiwon,elementry,academy,gym,school_zone
0,0,0,126.994216,37.174182,126.995336,37.175089,0,0,0,0,...,0.0,0.0,0.0,37.173729,126.994776,0,0,0,0,0
1,1,0,126.99421,37.175084,126.99533,37.17599,0,0,0,0,...,0.0,0.0,0.0,37.174631,126.99477,0,0,0,0,0
2,2,0,126.994204,37.175985,126.995324,37.176891,0,0,0,0,...,0.0,0.0,0.0,37.175532,126.994764,0,0,0,0,0
3,3,0,126.994198,37.176887,126.995318,37.177793,0,0,0,0,...,0.0,0.0,0.0,37.176433,126.994758,0,0,0,0,0
4,4,0,126.995342,37.174187,126.996463,37.175093,0,0,0,0,...,0.0,0.0,0.0,37.173734,126.995902,0,0,0,0,0


In [25]:
# 2. 위험지역 예측 함수

def predictRisk(areaID) :
    # id와 맞는 지역 데이터 불러오기
    orgData = org_df.iloc[areaID, :] # <class 'pandas.core.series.Series'>

    index = ['youth','baby','build','youchiwon','elementry','gym','academy','school_zone','cctv','cross','parking','speedBump','trafficLight']
    data = orgData[['youth','baby','build','youchiwon','elementry','gym','academy','school_zone','cctv','cross','parking','speedBump','trafficLight']]
    #print(data)
    
    # 모델에 넣을 데이터셋 리스트로 변환
    datalist = data.tolist()
    #print(datalist)
    
    # 모델 불러오기
    clf = joblib.load('OsanPredict_RF.dmp')
    print(type(clf))
    print('모델로딩성공')

    # 결과 및 확률로 격자별 위험등급 부여
    datalist = [datalist]

    result = clf.predict(datalist)[0]

    per = round(clf.predict_proba(datalist)[0][0]*100,2)

    # 사고가 나지 않을 확률을 기반으로 등급 부여   
    if result == 0:
        if per >= 90 :
            grade = 'N'
        elif per >= 70 :
            grade = 'D'
        elif per >= 60 :
            grade = 'C'
        elif per >= 55 :
            grade = 'B'
        else :
            grade = 'A'
    else :
        if 100 - per >= 90 :
            grade = 'N'
        elif 100 - per >= 70 :
            grade = 'D'
        elif 100 - per >= 60 :
            grade = 'C'
        elif 100 - per >= 55 :
            grade = 'B'
        else :
            grade = 'A'

    print('*** 시뮬레이션 결과 ***')
    print('*AreaID : '+str(areaID))
    print('*위치 : '+ str(orgData['lat1']) +','+ str(orgData['lon1']) )
    print('*위험등급 : '+str(grade))
    print('*해당지역 세부 데이터 : ')
    print(index)
    print(datalist)



In [26]:
# 3. 안전시설물 설치 시 시뮬레이션 함수 
def predictSafe(areaID,school_zone,cctv,cross,parking,speedBump,trafficLight) :
  
    # 모델에 넣을 데이터셋 리스트로 변환
    # id와 맞는 지역 데이터 불러오기
    orgData = org_df.iloc[areaID, :] # <class 'pandas.core.series.Series'>

    data1 = orgData[['youth','baby','build','youchiwon','elementry','gym','academy']]
    data2 = [school_zone,cctv,cross,parking,speedBump,trafficLight]

    index = ['youth','baby','build','youchiwon','elementry','gym','academy','school_zone','cctv','cross','parking','speedBump','trafficLight']
    data1 = data1.tolist()
    data3 = data1 + data2
    # print(data1)
    # print(data2)
    # print(data3)
    # 모델 불러오기
    clf = joblib.load('OsanPredict_RF.dmp')
    print(type(clf))
    print('모델로딩성공')

    # 결과 및 확률로 격자별 위험등급 부여
    datalist = [data3]

    result = clf.predict(datalist)[0]

    per = round(clf.predict_proba(datalist)[0][0]*100,2)

    # 사고가 나지 않을 확률을 기반으로 등급 부여   
    if result == 0:
        if per >= 90 :
            grade = 'N'
        elif per >= 70 :
            grade = 'D'
        elif per >= 60 :
            grade = 'C'
        elif per >= 55 :
            grade = 'B'
        else :
            grade = 'A'
    else :
        if 100 - per >= 90 :
            grade = 'N'
        elif 100 - per >= 70 :
            grade = 'D'
        elif 100 - per >= 60 :
            grade = 'C'
        elif 100 - per >= 55 :
            grade = 'B'
        else :
            grade = 'A'

    print('*** 시뮬레이션 결과 ***')
    print('*AreaID : '+str(areaID))
    print('*위치 : '+ str(orgData['lat1']) +','+ str(orgData['lon1']) )
    print('*위험등급 : '+str(grade))
    print('*해당지역 세부 데이터 : ')
    print(index)
    print(data3)


In [27]:
predictRisk(3828)

<class 'sklearn.ensemble._forest.RandomForestClassifier'>
모델로딩성공
*** 시뮬레이션 결과 ***
*AreaID : 3828
*위치 : 37.15287138,127.0765663
*위험등급 : A
*해당지역 세부 데이터 : 
['youth', 'baby', 'build', 'youchiwon', 'elementry', 'gym', 'academy', 'school_zone', 'cctv', 'cross', 'parking', 'speedBump', 'trafficLight']
[[0.0, 0.0, 925.9, 20.0, 1.0, 4.0, 8.0, 0.0, 0.0, 4.0, 23.0, 0.0, 8.0]]


In [28]:
#predictSafe(areaID,school_zone,cctv,cross,parking,speedBump,trafficLight)
predictSafe(3828,0,2,4,23,1,0) 

<class 'sklearn.ensemble._forest.RandomForestClassifier'>
모델로딩성공
*** 시뮬레이션 결과 ***
*AreaID : 3828
*위치 : 37.15287138,127.0765663
*위험등급 : B
*해당지역 세부 데이터 : 
['youth', 'baby', 'build', 'youchiwon', 'elementry', 'gym', 'academy', 'school_zone', 'cctv', 'cross', 'parking', 'speedBump', 'trafficLight']
[0.0, 0.0, 925.9, 20.0, 1.0, 4.0, 8.0, 0, 2, 4, 23, 1, 0]


In [29]:
predictSafe(3828,1,2,4,23,1,0) 

<class 'sklearn.ensemble._forest.RandomForestClassifier'>
모델로딩성공
*** 시뮬레이션 결과 ***
*AreaID : 3828
*위치 : 37.15287138,127.0765663
*위험등급 : C
*해당지역 세부 데이터 : 
['youth', 'baby', 'build', 'youchiwon', 'elementry', 'gym', 'academy', 'school_zone', 'cctv', 'cross', 'parking', 'speedBump', 'trafficLight']
[0.0, 0.0, 925.9, 20.0, 1.0, 4.0, 8.0, 1, 2, 4, 23, 1, 0]
