In [2]:
import pandas as pd

# 데이터 불러오기
pivot_df = pd.read_csv("./people_estimated_dataset.csv", parse_dates=['time'])

display(pivot_df)


Unnamed: 0,time,location,co2,humidity,temperature,area,people_est
0,2025-04-24 08:05:00+00:00,8인 책상,512.000000,51.50,23.400000,64.65,1.0
1,2025-04-24 08:05:00+00:00,보드,701.000000,49.00,25.000000,31.59,1.3
2,2025-04-24 08:05:00+00:00,입구,840.000000,87.25,12.200000,109.21,6.4
3,2025-04-24 08:10:00+00:00,8인 책상,507.750000,51.50,23.300000,64.65,0.9
4,2025-04-24 08:10:00+00:00,보드,699.000000,49.20,24.960000,31.59,1.3
...,...,...,...,...,...,...,...
7983,2025-05-02 07:50:00+00:00,안쪽벽 중앙,601.158647,34.00,23.700000,25.13,0.7
7984,2025-05-02 07:50:00+00:00,입구,1137.666667,36.25,24.391667,109.21,10.7
7985,2025-05-02 07:55:00+00:00,8인 책상,671.400000,32.50,25.200000,64.65,2.3
7986,2025-05-02 07:55:00+00:00,보드,700.500000,37.25,24.100000,31.59,1.3


In [3]:
import numpy as np

def temp_score(temp):
    if 22 <= temp <= 24: return 100
    elif 20 <= temp < 22 or 24 < temp <= 26: return 85
    elif 18 <= temp < 20 or 26 < temp <= 28: return 65
    else: return 40

def humidity_score(humi):
    if 40 <= humi <= 50: return 100
    elif 30 <= humi < 40 or 50 < humi <= 60: return 85
    elif 25 <= humi < 30 or 60 < humi <= 70: return 65
    else: return 40

def co2_score(co2):
    if co2 <= 800: return 100
    elif co2 <= 1000: return 85
    elif co2 <= 1200: return 65
    else: return max(40, 100 - np.log1p(co2 - 800) * 10)

def compute_cei_v2(temp, humi, co2):
    t, h, c = temp_score(temp), humidity_score(humi), co2_score(co2)
    return 0.4 * t + 0.3 * h + 0.3 * c

def cei_to_label(cei):
    if cei >= 90: return "😊 최적 쾌적"
    elif cei >= 70: return "😐 보통 상태"
    elif cei >= 50: return "⚠️ 주의 필요"
    else: return "🥵 위험 구역"

pivot_df['cei'] = pivot_df.apply(lambda row: compute_cei_v2(row['temperature'], row['humidity'], row['co2']), axis=1)
pivot_df['cei_label'] = pivot_df['cei'].apply(cei_to_label)


In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

le = LabelEncoder()
pivot_df['label_encoded'] = le.fit_transform(pivot_df['cei_label'])

X = pivot_df[['temperature', 'humidity', 'co2', 'people_est']]
y = pivot_df['label_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred, target_names=le.classes_))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

    ⚠️ 주의 필요       1.00      1.00      1.00        36
     😊 최적 쾌적       1.00      1.00      1.00      1007
     😐 보통 상태       1.00      1.00      1.00       954

    accuracy                           1.00      1997
   macro avg       1.00      1.00      1.00      1997
weighted avg       1.00      1.00      1.00      1997

[[  36    0    0]
 [   0 1005    2]
 [   0    1  953]]
