In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# 1. CSV 파일 읽기
csv_path = "C:/Users/82103/Desktop/multimodal/train(01~14)val(15~16)test(17~18)/AGV/agv_merged_output.csv"  # 병합된 CSV 파일 경로
df = pd.read_csv(csv_path)

# 2. 데이터 전처리
# 결측값 처리: 숫자형은 평균값으로, 범주형은 최빈값으로 채움
df.fillna(df.mean(), inplace=True)
for col in df.select_dtypes(include="object").columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Label Encoding: 범주형 데이터를 숫자형으로 변환
label_encoder = LabelEncoder()
if "device_id" in df.columns:
    df["device_id"] = label_encoder.fit_transform(df["device_id"])

# 특징 스케일링: 숫자형 열을 정규화
all_features = ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", 
                "temp_max_value", "ex_temperature", "ex_humidity", "ex_illuminance"]
scaler = StandardScaler()
df[all_features] = scaler.fit_transform(df[all_features])

# 3. 데이터셋 분리: train, test, val
train_df = df[df["subset"] == "train"]
test_df = df[df["subset"] == "test"]
val_df = df[df["subset"] == "val"]

# 학습 데이터
X_train_full = train_df[all_features]
y_train = train_df["state"]

# 검증 데이터
X_val_full = val_df[all_features]
y_val = val_df["state"]

# 테스트 데이터
X_test_full = test_df[all_features]
y_test = test_df["state"]

# 4. 변수 조합별 모델 학습 및 평가
feature_combinations = [
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4"], 
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_temperature"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_humidity"],
     ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_illuminance"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "ex_temperature", "ex_humidity", "ex_illuminance"],
    all_features  # 모든 변수 포함
]

results = []

for features in feature_combinations:
    # 학습 데이터 설정
    X_train = train_df[features]
    X_val = val_df[features]
    X_test = test_df[features]

    # 모델 학습
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # 검증 데이터 평가
    y_val_pred = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, y_val_pred)

    # 테스트 데이터 평가
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    # 결과 저장
    results.append({
        "Features": features,
        "Validation Accuracy": val_accuracy,
        "Test Accuracy": test_accuracy
    })

# 5. 결과 출력
for i, result in enumerate(results, 1):
    print(f"Combination {i}:")
    print(f"Features: {result['Features']}")
    print(f"Validation Accuracy: {result['Validation Accuracy']:.4f}")
    print(f"Test Accuracy: {result['Test Accuracy']:.4f}")
    print("-" * 40)


  del sys.path[0]


Combination 1:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4']
Validation Accuracy: 0.9703
Test Accuracy: 0.9327
----------------------------------------
Combination 2:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value']
Validation Accuracy: 0.9665
Test Accuracy: 0.9331
----------------------------------------
Combination 3:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_temperature']
Validation Accuracy: 0.9675
Test Accuracy: 0.9341
----------------------------------------
Combination 4:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_humidity']
Validation Accuracy: 0.9644
Test Accuracy: 0.9333
----------------------------------------
Combination 5:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_illuminance']
Validation Accuracy: 0.9640
Test Accuracy: 0.9341
----------------------------

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# 1. CSV 파일 읽기
csv_path = "C:/Users/82103/Desktop/multimodal/train(01~14)val(15~16)test(17~18)/OHT/oht_merged_output.csv"  # 병합된 CSV 파일 경로
df = pd.read_csv(csv_path)

# 2. 데이터 전처리
# 결측값 처리: 숫자형은 평균값으로, 범주형은 최빈값으로 채움
df.fillna(df.mean(), inplace=True)
for col in df.select_dtypes(include="object").columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Label Encoding: 범주형 데이터를 숫자형으로 변환
label_encoder = LabelEncoder()
if "device_id" in df.columns:
    df["device_id"] = label_encoder.fit_transform(df["device_id"])

# 특징 스케일링: 숫자형 열을 정규화
all_features = ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", 
                "temp_max_value", "ex_temperature", "ex_humidity", "ex_illuminance"]
scaler = StandardScaler()
df[all_features] = scaler.fit_transform(df[all_features])

# 3. 데이터셋 분리: train, test, val
train_df = df[df["subset"] == "train"]
test_df = df[df["subset"] == "test"]
val_df = df[df["subset"] == "val"]

# 학습 데이터
X_train_full = train_df[all_features]
y_train = train_df["state"]

# 검증 데이터
X_val_full = val_df[all_features]
y_val = val_df["state"]

# 테스트 데이터
X_test_full = test_df[all_features]
y_test = test_df["state"]

# 4. 변수 조합별 모델 학습 및 평가
feature_combinations = [
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4"], 
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_temperature"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_humidity"],
     ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_illuminance"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "ex_temperature", "ex_humidity", "ex_illuminance"],
    all_features  # 모든 변수 포함
]

results = []

for features in feature_combinations:
    # 학습 데이터 설정
    X_train = train_df[features]
    X_val = val_df[features]
    X_test = test_df[features]

    # 모델 학습
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # 검증 데이터 평가
    y_val_pred = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, y_val_pred)

    # 테스트 데이터 평가
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    # 결과 저장
    results.append({
        "Features": features,
        "Validation Accuracy": val_accuracy,
        "Test Accuracy": test_accuracy
    })

# 5. 결과 출력
for i, result in enumerate(results, 1):
    print(f"Combination {i}:")
    print(f"Features: {result['Features']}")
    print(f"Validation Accuracy: {result['Validation Accuracy']:.4f}")
    print(f"Test Accuracy: {result['Test Accuracy']:.4f}")
    print("-" * 40)


  del sys.path[0]


Combination 1:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4']
Validation Accuracy: 0.9452
Test Accuracy: 0.9361
----------------------------------------
Combination 2:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value']
Validation Accuracy: 0.9559
Test Accuracy: 0.9523
----------------------------------------
Combination 3:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_temperature']
Validation Accuracy: 0.9580
Test Accuracy: 0.9531
----------------------------------------
Combination 4:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_humidity']
Validation Accuracy: 0.9568
Test Accuracy: 0.9538
----------------------------------------
Combination 5:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_illuminance']
Validation Accuracy: 0.9572
Test Accuracy: 0.9537
----------------------------

In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# 1. CSV 파일 읽기
csv_path = "C:/Users/82103/Desktop/multimodal/train(01~14)val(15~16)test(17~18)/OHT/oht_merged_output.csv"  # 병합된 CSV 파일 경로
df = pd.read_csv(csv_path)

# 2. 데이터 전처리
# 결측값 처리: 숫자형은 평균값으로, 범주형은 최빈값으로 채움
df.fillna(df.mean(), inplace=True)
for col in df.select_dtypes(include="object").columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Label Encoding: 범주형 데이터를 숫자형으로 변환
label_encoder = LabelEncoder()
if "device_id" in df.columns:
    df["device_id"] = label_encoder.fit_transform(df["device_id"])

# 특징 스케일링: 숫자형 열을 정규화
all_features = ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", 
                "temp_max_value", "ex_temperature", "ex_humidity", "ex_illuminance"]
scaler = StandardScaler()
df[all_features] = scaler.fit_transform(df[all_features])

# 3. 데이터셋 분리: train, test, val
train_df = df[df["subset"] == "train"]
test_df = df[df["subset"] == "test"]
val_df = df[df["subset"] == "val"]

# 학습 데이터
X_train_full = train_df[all_features]
y_train = train_df["state"]

# 검증 데이터
X_val_full = val_df[all_features]
y_val = val_df["state"]

# 테스트 데이터
X_test_full = test_df[all_features]
y_test = test_df["state"]

# 4. 변수 조합별 모델 학습 및 평가
feature_combinations = [
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4"], 
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_temperature"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_humidity"],
     ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "temp_max_value", "ex_illuminance"],
    ["NTC", "PM10", "PM2.5", "PM1.0", "CT1", "CT2", "CT3", "CT4", "ex_temperature", "ex_humidity", "ex_illuminance"],
    all_features  # 모든 변수 포함
]

results = []

for features in feature_combinations:
    # 학습 데이터 설정
    X_train = train_df[features]
    X_val = val_df[features]
    X_test = test_df[features]

    # 모델 학습
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # 검증 데이터 평가
    y_val_pred = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, y_val_pred)

    # 테스트 데이터 평가
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    # 결과 저장
    results.append({
        "Features": features,
        "Validation Accuracy": val_accuracy,
        "Test Accuracy": test_accuracy
    })

# 5. 결과 출력
for i, result in enumerate(results, 1):
    print(f"Combination {i}:")
    print(f"Features: {result['Features']}")
    print(f"Validation Accuracy: {result['Validation Accuracy']:.4f}")
    print(f"Test Accuracy: {result['Test Accuracy']:.4f}")
    print("-" * 40)


  del sys.path[0]


Combination 1:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4']
Validation Accuracy: 0.9452
Test Accuracy: 0.9361
----------------------------------------
Combination 2:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value']
Validation Accuracy: 0.9559
Test Accuracy: 0.9523
----------------------------------------
Combination 3:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_temperature']
Validation Accuracy: 0.9580
Test Accuracy: 0.9531
----------------------------------------
Combination 4:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_humidity']
Validation Accuracy: 0.9568
Test Accuracy: 0.9538
----------------------------------------
Combination 5:
Features: ['NTC', 'PM10', 'PM2.5', 'PM1.0', 'CT1', 'CT2', 'CT3', 'CT4', 'temp_max_value', 'ex_illuminance']
Validation Accuracy: 0.9572
Test Accuracy: 0.9537
----------------------------