# Day 5: 고급 시각화 및 유형별 분석

**날짜**: 2025-07-08

**목표**:
- CCTV 유형별 효과 비교
- 범죄 유형별 분석
- Box plot, Violin plot
- 그룹별 비교 분석

In [None]:
import sys
import os
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from utils import *

set_korean_font()
set_plot_style()

df = pd.read_csv(os.path.join(DATA_PATHS['processed'], 'integrated_data.csv'), encoding='utf-8-sig')
print(f"✅ Day 5: 데이터 로드 완료 {df.shape}")

## 1. CCTV 유형별 효과 비교

In [None]:
# CCTV 유형별 범죄율과의 상관관계
cctv_types = ['인구당_방범용', '인구당_교통단속용', '인구당_어린이안전용']

correlations = []
for ctype in cctv_types:
    corr = df[ctype].corr(df['인구당_CCTV효과범죄율'])
    correlations.append(corr)

# 막대 그래프
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['green' if c < 0 else 'red' for c in correlations]
ax.bar(range(len(cctv_types)), correlations, color=colors, alpha=0.7, edgecolor='black')
ax.set_xticks(range(len(cctv_types)))
ax.set_xticklabels([c.replace('인구당_', '') for c in cctv_types])
ax.set_ylabel('상관계수')
ax.set_title('CCTV 유형별 범죄율과의 상관관계', fontsize=14, fontweight='bold')
ax.axhline(0, color='black', linewidth=0.8)
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATHS['figures'], 'day5_cctv_type_correlation.png'), dpi=300)
plt.show()

for ctype, corr in zip(cctv_types, correlations):
    print(f"{ctype}: {corr:.4f}")

## 2. 범죄 유형별 분석

In [None]:
# 범죄 유형별 평균
crime_types = ['인구당_절도율', '인구당_강도율', '인구당_차량범죄율']
crime_means = [df[ct].mean() for ct in crime_types]

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(range(len(crime_types)), crime_means, color=['steelblue', 'coral', 'gold'], 
       alpha=0.7, edgecolor='black')
ax.set_xticks(range(len(crime_types)))
ax.set_xticklabels([ct.replace('인구당_', '').replace('율', '') for ct in crime_types])
ax.set_ylabel('평균 범죄율 (건/천명)')
ax.set_title('CCTV 효과 범죄 유형별 평균', fontsize=14, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

for i, v in enumerate(crime_means):
    ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontweight='bold')

plt.tight_layout()
plt.savefig(os.path.join(DATA_PATHS['figures'], 'day5_crime_type_avg.png'), dpi=300)
plt.show()

## 3. 등급별 분포 (Box Plot)

In [None]:
# CCTV밀도 등급별 범죄율 분포
fig, ax = plt.subplots(figsize=(10, 6))
df.boxplot(column='인구당_CCTV효과범죄율', by='CCTV밀도_등급', ax=ax)
ax.set_xlabel('CCTV 밀도 등급')
ax.set_ylabel('인구당 범죄율 (건/천명)')
ax.set_title('CCTV 밀도 등급별 범죄율 분포')
plt.suptitle('')
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATHS['figures'], 'day5_boxplot_cctv_grade.png'), dpi=300)
plt.show()

In [None]:
print("✅ Day 5 완료")