In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.ndimage import gaussian_filter1d
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

# 1. 데이터 준비
file_path = '/Users/a0000/Desktop/cafe 4 data.csv'
df = pd.read_csv(file_path, encoding='utf-8-sig')
df.columns = ['date', 'time', 'detected_people']

# 날짜 및 시간 처리
df['date'] = pd.to_datetime(df['date'])
df['time'] = pd.to_timedelta(df['time']).dt.total_seconds().astype(int)

# Gaussian smoothing
sigma = 10
df['detected_people_smoothed'] = gaussian_filter1d(df['detected_people'], sigma)

# 스케일링 (StandardScaler 사용)
scaler = StandardScaler()
df['scaled_detected_people'] = scaler.fit_transform(df[['detected_people_smoothed']])

# 요일 추가 (2024년 9월 1일이 일요일로 가정)
start_date = pd.Timestamp('2024-09-01')
df['weekday'] = ((df['date'] - start_date).dt.days % 7).apply(
    lambda x: ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'][x])

# 시간(hour)로 변환
df['hour'] = df['time'] // 3600

# 2. 학습 데이터 준비
time_window = 6  # 학습에 사용할 시간 창 (6시간)
prediction_time = 1  # 1시간 후 예측
x_train, y_train = [], []

for i in range(len(df) - time_window - prediction_time):
    x_train.append(df['scaled_detected_people'].iloc[i:i + time_window].values)
    y_train.append(df['scaled_detected_people'].iloc[i + time_window + prediction_time])

x_train, y_train = np.array(x_train), np.array(y_train)

# 3. Seq2Seq 모델 정의
class Seq2SeqModel(tf.keras.Model):
    def __init__(self, units, time_window, **kwargs):
        super(Seq2SeqModel, self).__init__(**kwargs)
        self.units = units
        self.time_window = time_window
        self.encoder = layers.LSTM(units=self.units, return_sequences=True)
        self.decoder = layers.LSTM(units=self.units, return_sequences=False)
        self.output_layer = layers.Dense(1)

    def call(self, inputs):
        encoder_outputs = self.encoder(inputs)
        decoder_outputs = self.decoder(encoder_outputs[:, -self.time_window:, :])
        output = self.output_layer(decoder_outputs)
        return output

# 모델 초기화
input_shape = (time_window, 1)
inputs = layers.Input(shape=input_shape)
seq2seq_model = Seq2SeqModel(units=200, time_window=time_window)  # units 증가
outputs = seq2seq_model(inputs)

# 모델 컴파일 및 학습
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='mse')
model.fit(x_train, y_train, epochs=5, batch_size=10, verbose=1)  # epoch 및 batch_size 조정

# 4. 모든 요일과 시간대 예측
predicted_values = []
hours = range(9, 22)  # 오전 9시부터 오후 9시까지
weekdays = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

for day, weekday in enumerate(weekdays):  # 요일 순환
    for hour in hours:
        hour_data = df[(df['weekday'] == weekday) & (df['hour'] == hour)]['scaled_detected_people'].values
        if len(hour_data) >= time_window:  # 충분한 데이터가 있을 경우
            hour_input = np.array(hour_data[:time_window]).reshape(1, time_window, 1)
            predicted = model.predict(hour_input)
            predicted = scaler.inverse_transform(predicted)
            predicted_values.append({'weekday': weekday, 'hour': hour, 'predicted_people': predicted[0, 0]})
        else:
            predicted_values.append({'weekday': weekday, 'hour': hour, 'predicted_people': None})

# 5. 결과 저장
output_df = pd.DataFrame(predicted_values)
output_df.to_csv('/Users/a0000/Desktop/predicted_cafe_congestion_all_days4.csv', index=False)

print(output_df)


Epoch 1/5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 0.1329
Epoch 2/5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0493
Epoch 3/5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.0383
Epoch 4/5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0288
Epoch 5/5
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.0362
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12m