In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [13]:
    # 서울시 5대 범죄 데이터
    seoul_crime = pd.read_csv('predict_data/seoul_crime.csv', skiprows=3)
    seoul_crime.columns = ['행정구','총발생','총검거','살인발생','살인검거','강도발생','강도검거','강간강제추행발생', '강간강제추행검거','절도발생','절도검거','폭력발생','폭력검거']

    # 서울시 인구수 데이터
    seo_pop = pd.read_csv('predict_data/seo_pop.csv', skiprows=3)
    seo_pop.columns = ['합계','행정구','행정동','전체인구수','남자인구수','여자인구수','한국인구수','한국남자','한국여자','외국인구수','외국남자','외국여자']

    # 서울시 각구별 경찰서
    seoul_police_office = pd.read_csv('predict_data/seoul_polic.csv')

    # 서울시 cctv
    seoul_cctv = pd.read_csv('predict_data/seoul_cctv.csv', encoding='cp949')
    
    seo_gu = seoul_crime['행정구'].unique()
        
    # 각 구별 인구수
    pop_list = []
    for gu in seo_gu:
        pop_gu = seo_pop[seo_pop['행정구']==gu]['전체인구수'].sum()/2
        pop_list.append(pop_gu)
    seoul_crime['구별인구수'] = pd.Series(pop_list)
    
    # 각 구별 경찰서 수
    pof_list=[]
    for gu in seo_gu:
        pof_gu = seoul_police_office[seoul_police_office['행정구']==gu]['경찰서'].count()
        pof_list.append(pof_gu)
    seoul_crime['구별경찰서수'] = pd.Series(pof_list)
    
    # 각 구별 cctv 수
    cctv_list = []
    for gu in seo_gu:
        cctv_gu = seoul_cctv[seoul_cctv['자치구']==gu]['CCTV 수량'].sum()
        cctv_list.append(cctv_gu)
    seoul_crime['구별cctv수'] = pd.Series(cctv_list)
    
    seoul_crime['안전지수'] = (
        0.4 * seoul_crime['총검거'] / seoul_crime['총발생'] +
        0.3 * seoul_crime['구별인구수'] / seoul_crime['총발생'] +
        0.2 * seoul_crime['구별경찰서수'] / seoul_crime['구별인구수'] +
        0.1 * seoul_crime['구별cctv수'] / seoul_crime['구별인구수'])
    
    #
    features = seoul_crime[['총발생', '총검거', '구별인구수', '구별경찰서수', '구별cctv수']]
    target = seoul_crime['안전지수']
    
    scaler = StandardScaler()   
    features_scaled = scaler.fit_transform(features)
    
    X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.3, random_state=42)
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    predictions = model.predict(X_test)
    
    all_predictions = model.predict(features_scaled)
    print(all_predictions)
#     mse_test = mean_squared_error(y_test, predictions)
#     print(f'평균제곱오차 (test): {mse_test}')
    
#     mse_all = mean_squared_error(target, all_predictions)
#     print(f'평균제곱오차 (all): {mse_all}')
    
#     result_df_all = pd.DataFrame({'실제값': target, '예측값': all_predictions})
#     print(result_df_all)

[29.22957998 30.96151759 27.92640247 39.34171458 32.2708106  33.73062844
 32.69593582 44.80883616 36.60496571 38.11447662 37.37503053 35.77827258
 39.83655296 31.70165422 40.53532907 35.72436789 33.13815717 32.20682091
 27.91694522 41.03264521 31.81102643 30.03620557 27.32691345 33.01470517
 37.20061858]


In [12]:
df2=pd.DataFrame(all_predictions)
df1=pd.DataFrame(['종로구', '중구', '용산구', '성동구', '광진구', '동대문구', '중랑구', '성북구', '강북구', '도봉구', '노원구', '은평구', '서대문구', '마포구', '양천구', '강서구', '구로구', '금천구', '영등포구', '동작구', '관악구', '서초구', '강남구', '송파구', '강동구'])

In [6]:
all_predictions=pd.concat([df1,df2],axis=1)
all_predictions.columns=['행정구','예측값']

In [13]:
mask=all_predictions['행정구']=='종로구'
all_predictions[mask]['예측값']

0    29.22958
Name: 예측값, dtype: float64

In [18]:
features=pd.concat([df1,features],axis=1)
features.columns=['행정구', '총발생', '총검거', '구별인구수', '구별경찰서수', '구별cctv수']

Unnamed: 0,0,총발생,총검거,구별인구수,구별경찰서수,구별cctv수
0,종로구,3138,3284,152211.0,20,1930
1,중구,3071,2119,130785.0,15,2336
2,용산구,2967,2093,233284.0,7,1078
3,성동구,2194,1597,288234.0,9,4036
4,광진구,3619,2498,351252.0,10,4091
5,동대문구,3253,2231,353601.0,11,2857
6,중랑구,3599,2671,390140.0,8,3471
7,성북구,2749,2125,441984.0,10,3943
8,강북구,2832,2343,297702.0,9,2858
9,도봉구,2141,1569,313989.0,8,1598


In [21]:
features

Unnamed: 0,행정구,총발생,총검거,구별인구수,구별경찰서수,구별cctv수
0,종로구,3138,3284,152211.0,20,1930
1,중구,3071,2119,130785.0,15,2336
2,용산구,2967,2093,233284.0,7,1078
3,성동구,2194,1597,288234.0,9,4036
4,광진구,3619,2498,351252.0,10,4091
5,동대문구,3253,2231,353601.0,11,2857
6,중랑구,3599,2671,390140.0,8,3471
7,성북구,2749,2125,441984.0,10,3943
8,강북구,2832,2343,297702.0,9,2858
9,도봉구,2141,1569,313989.0,8,1598
