diff --git a/.coderabbit.yaml b/.coderabbit.yaml
new file mode 100644
index 0000000..065526b
--- /dev/null
+++ b/.coderabbit.yaml
@@ -0,0 +1,15 @@
+# .coderabbit.yaml
+language: "ko-KR"
+early_access: false
+reviews:
+ profile: "chill"
+ request_changes_workflow: false
+ high_level_summary: true
+ poem: true
+ review_status: true
+ collapse_walkthrough: false
+ auto_review:
+ enabled: true
+ drafts: false
+chat:
+ auto_reply: true
\ No newline at end of file
diff --git a/Assets/Core/Audio/AudioRecorder.cs b/Assets/Core/Audio/AudioRecorder.cs
index c5eb164..6e26ac0 100644
--- a/Assets/Core/Audio/AudioRecorder.cs
+++ b/Assets/Core/Audio/AudioRecorder.cs
@@ -2,43 +2,64 @@
using System;
using UnityEngine;
using System.Collections.Generic;
+using ProjectVG.Infrastructure.Audio;
namespace ProjectVG.Core.Audio
{
+ ///
+ /// 정확한 시간 기반 음성 녹음 시스템
+ /// 녹음 시작/중지 시간을 기반으로 정확한 길이의 오디오를 생성합니다.
+ ///
public class AudioRecorder : Singleton
{
[Header("Recording Settings")]
[SerializeField] private int _sampleRate = 44100;
[SerializeField] private int _channels = 1;
- [SerializeField] private int _maxRecordingLength = 30;
+ [SerializeField] private int _maxRecordingLength = 30; // 최대 녹음 시간 (초)
+
+ [Header("Audio Processing")]
+ [SerializeField] private bool _enableNoiseReduction = false; // 노이즈 제거 비활성화
+ [SerializeField] private float _silenceThreshold = 0.001f; // 무음 임계값 낮춤
private AudioClip? _recordingClip;
private bool _isRecording = false;
private float _recordingStartTime;
- private List _audioBuffer;
-
- public bool IsRecording => _isRecording;
- public float RecordingDuration => _isRecording ? Time.time - _recordingStartTime : 0f;
- public bool IsRecordingAvailable => Microphone.devices.Length > 0;
+ private float _recordingEndTime;
+ private string? _currentDevice = null;
+ // 이벤트
public event Action? OnRecordingStarted;
public event Action? OnRecordingStopped;
public event Action? OnRecordingCompleted;
public event Action? OnError;
+ public event Action? OnRecordingProgress; // 녹음 진행률 (0-1)
+
+ // 프로퍼티
+ public bool IsRecording => _isRecording;
+ public float RecordingDuration => _isRecording ? Time.time - _recordingStartTime : 0f;
+ public bool IsRecordingAvailable => Microphone.devices.Length > 0;
+ public float RecordingProgress => _isRecording ? Mathf.Clamp01(RecordingDuration / _maxRecordingLength) : 0f;
#region Unity Lifecycle
protected override void Awake()
{
base.Awake();
- _audioBuffer = new List();
+ InitializeMicrophone();
}
private void Update()
{
- if (_isRecording && RecordingDuration >= _maxRecordingLength)
+ if (_isRecording)
{
- StopRecording();
+ // 녹음 진행률 이벤트 발생
+ OnRecordingProgress?.Invoke(RecordingProgress);
+
+ // 최대 녹음 시간 체크
+ if (RecordingDuration >= _maxRecordingLength)
+ {
+ StopRecording();
+ }
}
}
@@ -54,6 +75,10 @@ private void OnDestroy()
#region Public Methods
+ ///
+ /// 음성 녹음 시작
+ ///
+ /// 녹음 시작 성공 여부
public bool StartRecording()
{
if (_isRecording)
@@ -73,10 +98,11 @@ public bool StartRecording()
{
_isRecording = true;
_recordingStartTime = Time.time;
- _audioBuffer.Clear();
- _recordingClip = Microphone.Start(null, false, _maxRecordingLength, _sampleRate);
+ // 최대 녹음 시간만큼 버퍼 할당
+ _recordingClip = Microphone.Start(_currentDevice ?? string.Empty, false, _maxRecordingLength, _sampleRate);
+ Debug.Log($"[AudioRecorder] 음성 녹음 시작됨 (최대 {_maxRecordingLength}초, {_sampleRate}Hz)");
OnRecordingStarted?.Invoke();
return true;
@@ -90,6 +116,10 @@ public bool StartRecording()
}
}
+ ///
+ /// 음성 녹음 중지
+ ///
+ /// 처리된 AudioClip
public AudioClip? StopRecording()
{
if (!_isRecording)
@@ -101,18 +131,25 @@ public bool StartRecording()
try
{
_isRecording = false;
+ _recordingEndTime = Time.time;
+ float actualRecordingDuration = _recordingEndTime - _recordingStartTime;
- Microphone.End(null);
+ Microphone.End(_currentDevice ?? string.Empty);
if (_recordingClip != null)
{
- ProcessRecordingClip();
- OnRecordingCompleted?.Invoke(_recordingClip);
+ AudioClip processedClip = ProcessRecordingClip(actualRecordingDuration);
+ if (processedClip != null)
+ {
+ Debug.Log($"[AudioRecorder] 음성 녹음 완료됨 ({actualRecordingDuration:F1}초, {processedClip.samples} 샘플)");
+ OnRecordingCompleted?.Invoke(processedClip);
+ OnRecordingStopped?.Invoke();
+ return processedClip;
+ }
}
OnRecordingStopped?.Invoke();
-
- return _recordingClip;
+ return null;
}
catch (Exception ex)
{
@@ -123,72 +160,186 @@ public bool StartRecording()
}
}
- public byte[] AudioClipToBytes(AudioClip audioClip)
+ ///
+ /// AudioClip을 WAV 바이트 배열로 변환
+ ///
+ public byte[] AudioClipToWavBytes(AudioClip audioClip)
{
if (audioClip == null)
- return new byte[0];
-
+ return Array.Empty();
try
{
- float[] samples = new float[audioClip.samples * audioClip.channels];
- audioClip.GetData(samples, 0);
-
- byte[] audioBytes = new byte[samples.Length * 2];
- for (int i = 0; i < samples.Length; i++)
+ return WavEncoder.FromAudioClip(audioClip);
+ }
+ catch (Exception ex)
+ {
+ Debug.LogError($"[AudioRecorder] WAV 변환 실패: {ex.Message}");
+ return Array.Empty();
+ }
+ }
+
+ ///
+ /// 녹음 파일 저장 (디버깅용)
+ ///
+ public bool SaveRecordingToFile(AudioClip audioClip, string fileName = "recording")
+ {
+ if (audioClip == null)
+ {
+ Debug.LogError("[AudioRecorder] 저장할 AudioClip이 null입니다.");
+ return false;
+ }
+
+ try
+ {
+ byte[] wavData = AudioClipToWavBytes(audioClip);
+ if (wavData.Length == 0)
{
- short sample = (short)(samples[i] * short.MaxValue);
- BitConverter.GetBytes(sample).CopyTo(audioBytes, i * 2);
+ Debug.LogError("[AudioRecorder] WAV 데이터 변환 실패");
+ return false;
}
+
+ string filePath = System.IO.Path.Combine(Application.persistentDataPath, $"{fileName}.wav");
+ System.IO.File.WriteAllBytes(filePath, wavData);
- return audioBytes;
+ Debug.Log($"[AudioRecorder] 녹음 파일 저장됨: {filePath} ({wavData.Length} bytes)");
+
+ return true;
}
catch (Exception ex)
{
- Debug.LogError($"[AudioRecorder] AudioClip을 byte 배열로 변환 실패: {ex.Message}");
- return new byte[0];
+ Debug.LogError($"[AudioRecorder] 파일 저장 실패: {ex.Message}");
+ return false;
}
}
+ ///
+ /// 사용 가능한 마이크 목록 반환
+ ///
public string[] GetAvailableMicrophones()
{
return Microphone.devices;
}
+ ///
+ /// 기본 마이크 반환
+ ///
public string GetDefaultMicrophone()
{
string[] devices = Microphone.devices;
return devices.Length > 0 ? devices[0] : string.Empty;
}
+ ///
+ /// 현재 마이크 설정
+ ///
+ public void SetMicrophone(string deviceName)
+ {
+ if (_isRecording)
+ {
+ Debug.LogError("[AudioRecorder] 녹음 중에는 마이크를 변경할 수 없습니다.");
+ return;
+ }
+
+ if (Array.Exists(Microphone.devices, device => device == deviceName))
+ {
+ _currentDevice = deviceName;
+ Debug.Log($"[AudioRecorder] 마이크 변경됨: {deviceName}");
+ }
+ else
+ {
+ Debug.LogWarning($"[AudioRecorder] 존재하지 않는 마이크: {deviceName}");
+ }
+ }
+
#endregion
#region Private Methods
- private void ProcessRecordingClip()
+ ///
+ /// 마이크 초기화
+ ///
+ private void InitializeMicrophone()
+ {
+ string[] devices = Microphone.devices;
+ if (devices.Length > 0)
+ {
+ _currentDevice = devices[0];
+ Debug.Log($"[AudioRecorder] 마이크 초기화됨: {_currentDevice}");
+ }
+ else
+ {
+ Debug.LogError("[AudioRecorder] 사용 가능한 마이크가 없습니다.");
+ }
+ }
+
+ ///
+ /// 녹음된 AudioClip 처리
+ ///
+ private AudioClip? ProcessRecordingClip(float actualDuration)
{
if (_recordingClip == null)
- return;
+ return null;
- int recordedLength = Microphone.GetPosition(null);
- if (recordedLength <= 0)
+ // 실제 녹음 시간을 기반으로 샘플 수 계산
+ int actualSamples = Mathf.RoundToInt(actualDuration * _sampleRate);
+
+ // 최대 샘플 수 제한 (버퍼 크기)
+ int maxSamples = _recordingClip.samples;
+ actualSamples = Mathf.Min(actualSamples, maxSamples);
+
+ Debug.Log($"[AudioRecorder] 녹음 데이터 처리 중 ({actualSamples}/{_recordingClip.samples} 샘플, {actualDuration:F1}초)");
+
+ if (actualSamples <= 0)
{
Debug.LogWarning("[AudioRecorder] 녹음된 데이터가 없습니다.");
- return;
+ return null;
}
+ // 실제 녹음된 길이만큼만 새로운 AudioClip 생성
AudioClip processedClip = AudioClip.Create(
"RecordedAudio",
- recordedLength,
+ actualSamples,
_recordingClip.channels,
_recordingClip.frequency,
false
);
- float[] samples = new float[recordedLength * _recordingClip.channels];
+ float[] samples = new float[actualSamples * _recordingClip.channels];
_recordingClip.GetData(samples, 0);
+
+ // 노이즈 리덕션 적용
+ if (_enableNoiseReduction)
+ {
+ ApplyNoiseReduction(samples);
+ }
+
processedClip.SetData(samples, 0);
+ // 원본 AudioClip 정리하여 메모리 누수 방지
+ if (_recordingClip != null)
+ {
+ DestroyImmediate(_recordingClip);
+ }
+
_recordingClip = processedClip;
+
+ Debug.Log($"[AudioRecorder] AudioClip 생성 완료 ({_recordingClip.samples} 샘플, {_recordingClip.channels} 채널, {_recordingClip.frequency}Hz)");
+
+ return _recordingClip;
+ }
+
+ ///
+ /// 노이즈 리덕션 적용
+ ///
+ private void ApplyNoiseReduction(float[] audioData)
+ {
+ for (int i = 0; i < audioData.Length; i++)
+ {
+ if (Mathf.Abs(audioData[i]) < _silenceThreshold)
+ {
+ audioData[i] = 0f;
+ }
+ }
}
#endregion
diff --git a/Assets/Infrastructure/Network/Editor.meta b/Assets/Core/Chat.meta
similarity index 77%
rename from Assets/Infrastructure/Network/Editor.meta
rename to Assets/Core/Chat.meta
index 667dfbd..1c75036 100644
--- a/Assets/Infrastructure/Network/Editor.meta
+++ b/Assets/Core/Chat.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: 84b7758c3c4878646b31785d9a1c3f22
+guid: a51c1117b42a1cb449577dbfe0758bd5
folderAsset: yes
DefaultImporter:
externalObjects: {}
diff --git a/Assets/Domain/Chat/View/VoiceInputView.cs b/Assets/Domain/Chat/View/VoiceInputView.cs
index 6dbaab5..9a4baba 100644
--- a/Assets/Domain/Chat/View/VoiceInputView.cs
+++ b/Assets/Domain/Chat/View/VoiceInputView.cs
@@ -6,6 +6,9 @@
using ProjectVG.Domain.Chat.Service;
using ProjectVG.Infrastructure.Network.Services;
using ProjectVG.Core.Audio;
+#if UNITY_EDITOR
+using UnityEditor;
+#endif
namespace ProjectVG.Domain.Chat.View
{
@@ -15,15 +18,18 @@ public class VoiceInputView : MonoBehaviour
[SerializeField] private Button? _btnVoice;
[SerializeField] private Button? _btnVoiceStop;
[SerializeField] private TextMeshProUGUI? _txtVoiceStatus;
+ [SerializeField] private Slider? _progressBar; // 녹음 진행률 표시
[Header("Voice Settings")]
[SerializeField] private float _maxRecordingTime = 30f;
- [SerializeField] private string _voiceStatusRecording = "녹음 중...";
- [SerializeField] private string _voiceStatusProcessing = "음성을 텍스트로 변환 중...";
+ [SerializeField] private string _voiceStatusRecording = "Recording..."; // "녹음 중..."에서 변경
+ [SerializeField] private string _voiceStatusProcessing = "Converting speech to text..."; // "음성을 텍스트로 변환 중..."에서 변경
+
+
private ChatManager? _chatManager;
private AudioRecorder? _audioRecorder;
- private ISTTService? _sttService;
+ private STTService? _sttService;
private bool _isRecording = false;
private float _recordingStartTime;
@@ -39,10 +45,11 @@ private void Start()
private void Update()
{
- if (_isRecording && Time.time - _recordingStartTime > _maxRecordingTime)
- {
- StopVoiceRecording();
- }
+ // 새로운 AudioRecorder는 자체적으로 최대 시간을 관리하므로 제거
+ // if (_isRecording && Time.time - _recordingStartTime > _maxRecordingTime)
+ // {
+ // StopVoiceRecording();
+ // }
}
private void OnDestroy()
@@ -57,6 +64,7 @@ private void OnDestroy()
_audioRecorder.OnRecordingStarted -= OnRecordingStarted;
_audioRecorder.OnRecordingStopped -= OnRecordingStopped;
_audioRecorder.OnRecordingCompleted -= OnRecordingCompleted;
+ _audioRecorder.OnRecordingProgress -= OnRecordingProgress;
_audioRecorder.OnError -= OnRecordingError;
}
}
@@ -130,6 +138,7 @@ public void StartVoiceRecording()
_recordingStartTime = Time.time;
UpdateVoiceButtonState(true);
UpdateVoiceStatus(_voiceStatusRecording);
+ UpdateProgressBar(0f);
bool success = _audioRecorder.StartRecording();
if (!success)
@@ -161,11 +170,12 @@ public void StopVoiceRecording()
_isRecording = false;
UpdateVoiceButtonState(false);
UpdateVoiceStatus(string.Empty);
+ UpdateProgressBar(0f);
AudioClip? recordedClip = _audioRecorder.StopRecording();
if (recordedClip != null)
{
- byte[] audioData = _audioRecorder.AudioClipToBytes(recordedClip);
+ byte[] audioData = _audioRecorder.AudioClipToWavBytes(recordedClip);
if (audioData.Length > 0)
{
SendVoiceMessage(audioData);
@@ -211,6 +221,15 @@ private void SetupComponents()
Debug.LogWarning("[VoiceInputView] TxtVoiceStatus 텍스트를 찾을 수 없습니다.");
}
}
+
+ if (_progressBar == null)
+ {
+ _progressBar = transform.Find("ProgressBar")?.GetComponent();
+ if (_progressBar == null)
+ {
+ Debug.LogWarning("[VoiceInputView] ProgressBar 슬라이더를 찾을 수 없습니다.");
+ }
+ }
if (_audioRecorder == null)
{
@@ -244,6 +263,7 @@ private void SetupEventHandlers()
_audioRecorder.OnRecordingStarted += OnRecordingStarted;
_audioRecorder.OnRecordingStopped += OnRecordingStopped;
_audioRecorder.OnRecordingCompleted += OnRecordingCompleted;
+ _audioRecorder.OnRecordingProgress += OnRecordingProgress;
_audioRecorder.OnError += OnRecordingError;
}
}
@@ -278,6 +298,15 @@ private void UpdateVoiceStatus(string status)
}
}
+ private void UpdateProgressBar(float progress)
+ {
+ if (_progressBar != null)
+ {
+ _progressBar.value = progress;
+ _progressBar.gameObject.SetActive(progress > 0f);
+ }
+ }
+
private async System.Threading.Tasks.Task ConvertSpeechToText(byte[] audioData)
{
if (_sttService == null)
@@ -288,11 +317,6 @@ private async System.Threading.Tasks.Task ConvertSpeechToText(byte[] aud
try
{
- if (!_sttService.IsAvailable)
- {
- await _sttService.InitializeAsync();
- }
-
string transcribedText = await _sttService.ConvertSpeechToTextAsync(audioData);
return transcribedText;
}
@@ -303,6 +327,30 @@ private async System.Threading.Tasks.Task ConvertSpeechToText(byte[] aud
}
}
+ ///
+ /// 더미 음성으로 STT 서버 테스트
+ ///
+ [ContextMenu("Test STT with Dummy Audio")]
+ public async void TestSTTWithDummyAudio()
+ {
+ if (_sttService == null)
+ {
+ Debug.LogError("[VoiceInputView] STT 서비스가 없습니다.");
+ return;
+ }
+
+ try
+ {
+ byte[] dummyAudio = _sttService.GenerateTestAudioData();
+ string result = await _sttService.ConvertSpeechToTextAsync(dummyAudio);
+ Debug.Log($"[VoiceInputView] STT 테스트 결과: '{result}'");
+ }
+ catch (Exception ex)
+ {
+ Debug.LogError($"[VoiceInputView] STT 테스트 실패: {ex.Message}");
+ }
+ }
+
private void OnVoiceButtonClicked()
{
StartVoiceRecording();
@@ -315,19 +363,26 @@ private void OnVoiceStopButtonClicked()
private void OnRecordingStarted()
{
+ // AudioRecorder에서 로그 출력
}
private void OnRecordingStopped()
{
+ // AudioRecorder에서 로그 출력
}
private void OnRecordingCompleted(AudioClip audioClip)
{
+ // AudioRecorder에서 로그 출력
+ }
+
+ private void OnRecordingProgress(float progress)
+ {
+ UpdateProgressBar(progress);
}
private void OnRecordingError(string error)
{
- Debug.LogError($"[VoiceInputView] 녹음 오류: {error}");
OnError?.Invoke(error);
}
diff --git a/Assets/Infrastructure/Audio.meta b/Assets/Infrastructure/Audio.meta
new file mode 100644
index 0000000..bdd5d0c
--- /dev/null
+++ b/Assets/Infrastructure/Audio.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 8d53a70bafcfdff4da867701f8ccfdfb
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Assets/Infrastructure/Audio/WavEncoder.cs b/Assets/Infrastructure/Audio/WavEncoder.cs
new file mode 100644
index 0000000..ef3a4cb
--- /dev/null
+++ b/Assets/Infrastructure/Audio/WavEncoder.cs
@@ -0,0 +1,95 @@
+using System;
+using UnityEngine;
+
+namespace ProjectVG.Infrastructure.Audio
+{
+ ///
+ /// AudioClip을 WAV 포맷의 바이트 배열로 인코딩합니다.
+ ///
+ public static class WavEncoder
+ {
+ ///
+ /// AudioClip을 16-bit PCM WAV 바이트 배열로 변환합니다.
+ ///
+ public static byte[] FromAudioClip(AudioClip audioClip)
+ {
+ if (audioClip == null)
+ {
+ return Array.Empty();
+ }
+
+ int channels = audioClip.channels;
+ int sampleRate = audioClip.frequency;
+ int sampleCount = audioClip.samples * channels;
+
+ float[] samples = new float[sampleCount];
+ audioClip.GetData(samples, 0);
+
+ byte[] pcm16 = new byte[sampleCount * 2];
+ int pcmIndex = 0;
+ for (int i = 0; i < sampleCount; i++)
+ {
+ float clamped = Mathf.Clamp(samples[i], -1f, 1f);
+ short s = (short)Mathf.RoundToInt(clamped * short.MaxValue);
+ pcm16[pcmIndex++] = (byte)(s & 0xFF);
+ pcm16[pcmIndex++] = (byte)((s >> 8) & 0xFF);
+ }
+
+ return WrapPcm16ToWav(pcm16, channels, sampleRate);
+ }
+
+ ///
+ /// 16-bit PCM 샘플 데이터를 WAV 컨테이너로 포장합니다.
+ ///
+ public static byte[] WrapPcm16ToWav(byte[] pcm16Data, int channels, int sampleRate)
+ {
+ if (pcm16Data == null || pcm16Data.Length == 0)
+ {
+ return Array.Empty();
+ }
+
+ int bitsPerSample = 16;
+ int subchunk2Size = pcm16Data.Length;
+ int byteRate = sampleRate * channels * (bitsPerSample / 8);
+ short blockAlign = (short)(channels * (bitsPerSample / 8));
+ int chunkSize = 36 + subchunk2Size;
+
+ byte[] wav = new byte[44 + subchunk2Size];
+ int i = 0;
+
+ wav[i++] = (byte)'R'; wav[i++] = (byte)'I'; wav[i++] = (byte)'F'; wav[i++] = (byte)'F';
+ wav[i++] = (byte)(chunkSize & 0xFF);
+ wav[i++] = (byte)((chunkSize >> 8) & 0xFF);
+ wav[i++] = (byte)((chunkSize >> 16) & 0xFF);
+ wav[i++] = (byte)((chunkSize >> 24) & 0xFF);
+ wav[i++] = (byte)'W'; wav[i++] = (byte)'A'; wav[i++] = (byte)'V'; wav[i++] = (byte)'E';
+ wav[i++] = (byte)'f'; wav[i++] = (byte)'m'; wav[i++] = (byte)'t'; wav[i++] = (byte)' ';
+ wav[i++] = 16; wav[i++] = 0; wav[i++] = 0; wav[i++] = 0;
+ wav[i++] = 1; wav[i++] = 0;
+ wav[i++] = (byte)(channels & 0xFF);
+ wav[i++] = (byte)((channels >> 8) & 0xFF);
+ wav[i++] = (byte)(sampleRate & 0xFF);
+ wav[i++] = (byte)((sampleRate >> 8) & 0xFF);
+ wav[i++] = (byte)((sampleRate >> 16) & 0xFF);
+ wav[i++] = (byte)((sampleRate >> 24) & 0xFF);
+ wav[i++] = (byte)(byteRate & 0xFF);
+ wav[i++] = (byte)((byteRate >> 8) & 0xFF);
+ wav[i++] = (byte)((byteRate >> 16) & 0xFF);
+ wav[i++] = (byte)((byteRate >> 24) & 0xFF);
+ wav[i++] = (byte)(blockAlign & 0xFF);
+ wav[i++] = (byte)((blockAlign >> 8) & 0xFF);
+ wav[i++] = (byte)(bitsPerSample & 0xFF);
+ wav[i++] = (byte)((bitsPerSample >> 8) & 0xFF);
+ wav[i++] = (byte)'d'; wav[i++] = (byte)'a'; wav[i++] = (byte)'t'; wav[i++] = (byte)'a';
+ wav[i++] = (byte)(subchunk2Size & 0xFF);
+ wav[i++] = (byte)((subchunk2Size >> 8) & 0xFF);
+ wav[i++] = (byte)((subchunk2Size >> 16) & 0xFF);
+ wav[i++] = (byte)((subchunk2Size >> 24) & 0xFF);
+
+ Buffer.BlockCopy(pcm16Data, 0, wav, 44, subchunk2Size);
+ return wav;
+ }
+ }
+}
+
+
diff --git a/Assets/Infrastructure/Audio/WavEncoder.cs.meta b/Assets/Infrastructure/Audio/WavEncoder.cs.meta
new file mode 100644
index 0000000..7dbd000
--- /dev/null
+++ b/Assets/Infrastructure/Audio/WavEncoder.cs.meta
@@ -0,0 +1,2 @@
+fileFormatVersion: 2
+guid: 35b3cb11f27bd1a469a627d275fe49f1
\ No newline at end of file
diff --git a/Assets/Infrastructure/Network/Configs/NetworkConfig.cs b/Assets/Infrastructure/Network/Configs/NetworkConfig.cs
index 7a7e077..d4c1d94 100644
--- a/Assets/Infrastructure/Network/Configs/NetworkConfig.cs
+++ b/Assets/Infrastructure/Network/Configs/NetworkConfig.cs
@@ -25,6 +25,11 @@ public class NetworkConfig : ScriptableObject
[SerializeField] private int maxRetryCount = 3;
[SerializeField] private float retryDelay = 1f;
+ [Header("File Upload Settings")]
+ [SerializeField] private int maxFileSize = 10485760; // 10MB (bytes)
+ [SerializeField] private float uploadTimeout = 60f; // 파일 업로드용 더 긴 타임아웃
+ [SerializeField] private bool enableFileSizeCheck = true;
+
[Header("WebSocket Settings")]
[SerializeField] private string wsPath = "ws";
[SerializeField] private float wsTimeout = 30f;
@@ -178,6 +183,11 @@ public static string GetWebSocketUrlWithSession(string sessionId)
public static string UserAgent => Instance.userAgent;
public static string ContentType => Instance.contentType;
+ // File Upload Settings
+ public static int MaxFileSize => Instance.maxFileSize;
+ public static float UploadTimeout => Instance.uploadTimeout;
+ public static bool EnableFileSizeCheck => Instance.enableFileSizeCheck;
+
// WebSocket 설정 정적 접근자 복원
public static float WebSocketTimeout => Instance.wsTimeout;
public static float ReconnectDelay => Instance.reconnectDelay;
@@ -234,6 +244,9 @@ private static NetworkConfig CreateDefaultInstance()
instance.enableMessageLogging = true;
instance.userAgent = "ProjectVG-Client/1.0";
instance.contentType = "application/json";
+ instance.maxFileSize = 10485760;
+ instance.uploadTimeout = 60f;
+ instance.enableFileSizeCheck = true;
Debug.LogWarning("기본 NetworkConfig를 생성했습니다. Resources 폴더에 NetworkConfig.asset 파일을 생성하는 것을 권장합니다.");
diff --git a/Assets/Infrastructure/Network/Http/HttpApiClient.cs b/Assets/Infrastructure/Network/Http/HttpApiClient.cs
index 7a880f9..f7af97d 100644
--- a/Assets/Infrastructure/Network/Http/HttpApiClient.cs
+++ b/Assets/Infrastructure/Network/Http/HttpApiClient.cs
@@ -56,27 +56,27 @@ public void SetAuthToken(string token)
public async UniTask GetAsync(string endpoint, Dictionary headers = null, CancellationToken cancellationToken = default)
{
var url = IsFullUrl(endpoint) ? endpoint : GetFullUrl(endpoint);
- return await SendRequestAsync(url, UnityWebRequest.kHttpVerbGET, null, headers, cancellationToken);
+ return await SendJsonRequestAsync(url, UnityWebRequest.kHttpVerbGET, null, headers, cancellationToken);
}
public async UniTask PostAsync(string endpoint, object data = null, Dictionary headers = null, bool requiresSession = false, CancellationToken cancellationToken = default)
{
var url = GetFullUrl(endpoint);
var jsonData = SerializeData(data, requiresSession);
- return await SendRequestAsync(url, UnityWebRequest.kHttpVerbPOST, jsonData, headers, cancellationToken);
+ return await SendJsonRequestAsync(url, UnityWebRequest.kHttpVerbPOST, jsonData, headers, cancellationToken);
}
public async UniTask PutAsync(string endpoint, object data = null, Dictionary headers = null, bool requiresSession = false, CancellationToken cancellationToken = default)
{
var url = GetFullUrl(endpoint);
var jsonData = SerializeData(data, requiresSession);
- return await SendRequestAsync(url, UnityWebRequest.kHttpVerbPUT, jsonData, headers, cancellationToken);
+ return await SendJsonRequestAsync(url, UnityWebRequest.kHttpVerbPUT, jsonData, headers, cancellationToken);
}
public async UniTask DeleteAsync(string endpoint, Dictionary headers = null, CancellationToken cancellationToken = default)
{
var url = GetFullUrl(endpoint);
- return await SendRequestAsync(url, UnityWebRequest.kHttpVerbDELETE, null, headers, cancellationToken);
+ return await SendJsonRequestAsync(url, UnityWebRequest.kHttpVerbDELETE, null, headers, cancellationToken);
}
public async UniTask UploadFileAsync(string endpoint, byte[] fileData, string fileName, string fieldName = "file", Dictionary headers = null, CancellationToken cancellationToken = default)
@@ -88,7 +88,31 @@ public async UniTask UploadFileAsync(string endpoint, byte[] fileData, str
public async UniTask PostFormDataAsync(string endpoint, Dictionary formData, Dictionary headers = null, CancellationToken cancellationToken = default)
{
var url = IsFullUrl(endpoint) ? endpoint : GetFullUrl(endpoint);
- return await SendFormDataRequestAsync(url, formData, headers, cancellationToken);
+ return await SendFormDataRequestAsync(url, formData, null, headers, cancellationToken);
+ }
+
+ public async UniTask PostFormDataAsync(string endpoint, Dictionary formData, Dictionary fileNames, Dictionary headers = null, CancellationToken cancellationToken = default)
+ {
+ var url = IsFullUrl(endpoint) ? endpoint : GetFullUrl(endpoint);
+
+ // 파일 크기 검사
+ if (NetworkConfig.EnableFileSizeCheck)
+ {
+ foreach (var kvp in formData)
+ {
+ if (kvp.Value is byte[] byteData)
+ {
+ if (byteData.Length > NetworkConfig.MaxFileSize)
+ {
+ var fileSizeMB = byteData.Length / 1024.0 / 1024.0;
+ var maxSizeMB = NetworkConfig.MaxFileSize / 1024.0 / 1024.0;
+ throw new FileSizeExceededException(fileSizeMB, maxSizeMB);
+ }
+ }
+ }
+ }
+
+ return await SendFormDataRequestAsync(url, formData, fileNames, headers, cancellationToken);
}
public void Shutdown()
@@ -158,6 +182,41 @@ private void LogRequestDetails(string method, string url, string jsonData)
{
}
+ private async UniTask SendJsonRequestAsync(string url, string method, string jsonData, Dictionary headers, CancellationToken cancellationToken)
+ {
+ var combinedCancellationToken = CreateCombinedCancellationToken(cancellationToken);
+
+ for (int attempt = 0; attempt <= NetworkConfig.MaxRetryCount; attempt++)
+ {
+ try
+ {
+ using var request = CreateJsonRequest(url, method, jsonData, headers);
+
+ var operation = request.SendWebRequest();
+ await operation.WithCancellation(combinedCancellationToken);
+
+ if (request.result == UnityWebRequest.Result.Success)
+ {
+ return ParseResponse(request);
+ }
+ else
+ {
+ await HandleRequestFailure(request, attempt, combinedCancellationToken);
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ throw;
+ }
+ catch (Exception ex) when (ex is not ApiException)
+ {
+ await HandleRequestException(ex, attempt, combinedCancellationToken);
+ }
+ }
+
+ throw new ApiException($"{NetworkConfig.MaxRetryCount + 1}번 시도 후 요청 실패", 0, "최대 재시도 횟수 초과");
+ }
+
private async UniTask SendRequestAsync(string url, string method, string jsonData, Dictionary headers, CancellationToken cancellationToken)
{
var combinedCancellationToken = CreateCombinedCancellationToken(cancellationToken);
@@ -233,8 +292,11 @@ private async UniTask SendFileRequestAsync(string url, byte[] fileData, st
throw new ApiException($"{NetworkConfig.MaxRetryCount + 1}번 시도 후 파일 업로드 실패", 0, "최대 재시도 횟수 초과");
}
- private async UniTask SendFormDataRequestAsync(string url, Dictionary formData, Dictionary headers, CancellationToken cancellationToken)
+
+
+ private async UniTask SendFormDataRequestAsync(string url, Dictionary formData, Dictionary fileNames, Dictionary headers, CancellationToken cancellationToken)
{
+ fileNames = fileNames ?? new Dictionary();
var combinedCancellationToken = CreateCombinedCancellationToken(cancellationToken);
for (int attempt = 0; attempt <= NetworkConfig.MaxRetryCount; attempt++)
@@ -242,23 +304,29 @@ private async UniTask SendFormDataRequestAsync(string url, Dictionary SendFormDataRequestAsync(string url, Dictionary headers)
+ {
+ var request = new UnityWebRequest(url, method);
+
+ if (!string.IsNullOrEmpty(jsonData))
+ {
+ var bodyRaw = Encoding.UTF8.GetBytes(jsonData);
+ request.uploadHandler = new UploadHandlerRaw(bodyRaw);
+ request.SetRequestHeader("Content-Type", "application/json");
+ }
+
+ request.downloadHandler = new DownloadHandlerBuffer();
+ SetupRequest(request, headers);
+ request.timeout = (int)NetworkConfig.HttpTimeout;
+
+ return request;
+ }
+
private void SetupRequest(UnityWebRequest request, Dictionary headers)
{
+ // UnityWebRequest.Post로 생성된 요청은 무조건 파일 업로드로 처리
+ bool isFileUpload = request.method == UnityWebRequest.kHttpVerbPOST &&
+ request.uploadHandler != null;
+
foreach (var header in defaultHeaders)
{
+ // 파일 업로드 시에는 Content-Type 헤더를 제외 (UnityWebRequest가 자동 설정)
+ if (isFileUpload && header.Key.ToLower() == "content-type")
+ {
+ continue;
+ }
+
request.SetRequestHeader(header.Key, header.Value);
}
@@ -370,14 +466,22 @@ private void SetupRequest(UnityWebRequest request, Dictionary he
request.SetRequestHeader(header.Key, header.Value);
}
}
+
+ // 디버깅: Content-Type 헤더 확인
+ string contentType = request.GetRequestHeader("Content-Type");
+ Debug.Log($"[HttpApiClient] 요청 헤더 설정 완료 - Content-Type: {contentType}");
}
private T ParseResponse(UnityWebRequest request)
{
var responseText = request.downloadHandler?.text;
+ Debug.Log($"[HttpApiClient] 응답 파싱 - Status: {request.responseCode}, Content-Length: {request.downloadHandler?.data?.Length ?? 0}");
+ Debug.Log($"[HttpApiClient] 응답 텍스트: '{responseText}'");
+
if (string.IsNullOrEmpty(responseText))
{
+ Debug.LogWarning("[HttpApiClient] 응답 텍스트가 비어있습니다.");
return default(T);
}
@@ -387,6 +491,7 @@ private T ParseResponse(UnityWebRequest request)
}
catch (Exception ex)
{
+ Debug.LogError($"[HttpApiClient] JSON 파싱 실패: {ex.Message}");
return TryFallbackParse(responseText, request.responseCode, ex);
}
}
@@ -423,4 +528,17 @@ public ApiException(string message, long statusCode, string responseBody)
ResponseBody = responseBody;
}
}
+
+ public class FileSizeExceededException : ApiException
+ {
+ public FileSizeExceededException(double fileSizeMB, double maxSizeMB)
+ : base($"File size exceeds limit: {fileSizeMB:F2}MB (limit: {maxSizeMB:F2}MB)", 413, null)
+ {
+ FileSizeMB = fileSizeMB;
+ MaxSizeMB = maxSizeMB;
+ }
+
+ public double FileSizeMB { get; }
+ public double MaxSizeMB { get; }
+ }
}
\ No newline at end of file
diff --git a/Assets/Infrastructure/Network/Services/ISTTService.cs b/Assets/Infrastructure/Network/Services/ISTTService.cs
deleted file mode 100644
index b22276a..0000000
--- a/Assets/Infrastructure/Network/Services/ISTTService.cs
+++ /dev/null
@@ -1,40 +0,0 @@
-#nullable enable
-using System;
-using System.Threading.Tasks;
-
-namespace ProjectVG.Infrastructure.Network.Services
-{
- ///
- /// Speech-to-Text 서비스 인터페이스
- /// 음성 데이터를 텍스트로 변환하는 기능을 제공합니다.
- ///
- public interface ISTTService
- {
- ///
- /// 음성 데이터를 텍스트로 변환
- ///
- /// 음성 데이터
- /// 음성 포맷 (wav, mp3 등)
- /// 언어 코드 (ko-KR, en-US 등)
- /// 변환된 텍스트
- Task ConvertSpeechToTextAsync(byte[] audioData, string audioFormat = "wav", string language = "ko-KR");
-
- ///
- /// 서비스 초기화
- ///
- /// 초기화 성공 여부
- Task InitializeAsync();
-
- ///
- /// 서비스 연결 상태 확인
- ///
- /// 연결 상태
- bool IsConnected { get; }
-
- ///
- /// 서비스 사용 가능 여부
- ///
- /// 사용 가능 여부
- bool IsAvailable { get; }
- }
-}
\ No newline at end of file
diff --git a/Assets/Infrastructure/Network/Services/ISTTService.cs.meta b/Assets/Infrastructure/Network/Services/ISTTService.cs.meta
deleted file mode 100644
index 2253d6f..0000000
--- a/Assets/Infrastructure/Network/Services/ISTTService.cs.meta
+++ /dev/null
@@ -1,2 +0,0 @@
-fileFormatVersion: 2
-guid: 16aeee494dc1cb44d95b4148171e3a97
\ No newline at end of file
diff --git a/Assets/Infrastructure/Network/Services/STTService.cs b/Assets/Infrastructure/Network/Services/STTService.cs
index f574b03..cca1769 100644
--- a/Assets/Infrastructure/Network/Services/STTService.cs
+++ b/Assets/Infrastructure/Network/Services/STTService.cs
@@ -2,10 +2,12 @@
using System;
using System.Threading.Tasks;
using System.Collections.Generic;
+using System.Threading;
using UnityEngine;
using ProjectVG.Infrastructure.Network.Http;
using ProjectVG.Infrastructure.Network.DTOs.Chat;
using Newtonsoft.Json;
+using Cysharp.Threading.Tasks;
namespace ProjectVG.Infrastructure.Network.Services
{
@@ -13,51 +15,19 @@ namespace ProjectVG.Infrastructure.Network.Services
/// Speech-to-Text 서비스 구현체
/// HTTP API를 통해 음성을 텍스트로 변환합니다.
///
- public class STTService : ISTTService
+ public class STTService
{
- private readonly string _baseUrl;
- private bool _isInitialized = false;
- private bool _isConnected = false;
+ private readonly HttpApiClient _httpClient;
- public bool IsConnected => _isConnected;
- public bool IsAvailable => _isInitialized && _isConnected;
+ public bool IsConnected => true; // 항상 연결 가능하다고 가정
+ public bool IsAvailable => _httpClient != null;
- public STTService(string baseUrl = "http://localhost:7920")
+ public STTService()
{
- _baseUrl = baseUrl;
- }
-
- ///
- /// STT 서비스 초기화
- ///
- /// 초기화 성공 여부
- public async Task InitializeAsync()
- {
- try
- {
- // 서버 상태 확인 (전체 URL 사용)
- var healthResponse = await HttpApiClient.Instance.GetAsync($"{_baseUrl}/api/v1/health");
- if (healthResponse != null)
- {
- _isConnected = healthResponse.Status == "healthy" && healthResponse.ModelLoaded == true;
- _isInitialized = true;
-
- return _isConnected;
- }
- else
- {
- Debug.LogError("[STTService] STT 서버 상태 확인 실패");
- _isConnected = false;
- _isInitialized = false;
- return false;
- }
- }
- catch (Exception ex)
+ _httpClient = HttpApiClient.Instance;
+ if (_httpClient == null)
{
- Debug.LogError($"[STTService] STT 서비스 초기화 실패: {ex.Message}");
- _isConnected = false;
- _isInitialized = false;
- return false;
+ Debug.LogError("[STTService] HttpApiClient.Instance가 null입니다. HttpApiClient가 생성되지 않았습니다.");
}
}
@@ -68,13 +38,14 @@ public async Task InitializeAsync()
/// 음성 포맷
/// 언어 코드
/// 변환된 텍스트
- public async Task ConvertSpeechToTextAsync(byte[] audioData, string audioFormat = "wav", string language = "ko")
+ public async UniTask ConvertSpeechToTextAsync(byte[] audioData, string audioFormat = "wav", string language = "ko", CancellationToken cancellationToken = default)
{
- if (!IsAvailable)
+ if (_httpClient == null)
{
- throw new InvalidOperationException("STT 서비스가 사용 불가능합니다.");
+ Debug.LogError("[STTService] HttpApiClient가 null입니다. 초기화를 확인해주세요.");
+ throw new InvalidOperationException("HttpApiClient가 초기화되지 않았습니다.");
}
-
+
if (audioData == null || audioData.Length == 0)
{
throw new ArgumentException("음성 데이터가 비어있습니다.");
@@ -82,29 +53,37 @@ public async Task ConvertSpeechToTextAsync(byte[] audioData, string audi
try
{
- // multipart/form-data로 파일 업로드
var formData = new Dictionary
{
{ "file", audioData }
};
-
- // 쿼리 파라미터 추가
- string url = $"{_baseUrl}/api/v1/transcribe";
- if (!string.IsNullOrEmpty(language))
+
+ var fileNames = new Dictionary
{
- url += $"?language={language}";
- }
+ { "file", "recording.wav" }
+ };
+
+ // 서버 API에 맞게 language 파라미터만 사용
+ string forcedLanguage = "ko";
+ string endpoint = $"stt/transcribe?language={forcedLanguage}";
+
+ Debug.Log($"[STTService] STT 변환 요청 시작 - 엔드포인트: {endpoint}, 파일 크기: {audioData.Length / 1024}KB, 강제 언어: {forcedLanguage}");
+ Debug.Log($"[STTService] URL 확인: {endpoint}");
+
+ var response = await _httpClient.PostFormDataAsync(endpoint, formData, fileNames, cancellationToken: cancellationToken);
- // HTTP POST 요청 (전체 URL 사용)
- var response = await HttpApiClient.Instance.PostFormDataAsync(url, formData);
+ Debug.Log($"[STTService] 응답 객체 - Text: '{response?.Text}', Language: '{response?.Language}'");
+ Debug.Log($"[STTService] 응답 객체 - LanguageProbability: {response?.LanguageProbability}, SegmentsCount: {response?.SegmentsCount}");
+ Debug.Log($"[STTService] 응답 객체 - ProcessingTime: {response?.ProcessingTime}");
if (response != null && !string.IsNullOrEmpty(response.Text))
{
+ Debug.Log($"[STTService] STT 변환 성공 - 텍스트: '{response.Text}'");
return response.Text;
}
else
{
- Debug.LogError("[STTService] STT 변환 실패: 응답이 비어있습니다.");
+ Debug.LogError($"[STTService] STT 변환 실패: 응답이 비어있습니다. Text: '{response?.Text}'");
throw new Exception("음성 변환 실패: 응답이 비어있습니다.");
}
}
@@ -114,6 +93,39 @@ public async Task ConvertSpeechToTextAsync(byte[] audioData, string audi
throw;
}
}
+
+ ///
+ /// 테스트용 더미 음성 데이터 생성 (1초, 22050Hz, 사인파)
+ ///
+ public byte[] GenerateTestAudioData()
+ {
+ int sampleRate = 22050;
+ int duration = 1; // 1초
+ int samples = sampleRate * duration;
+
+ // 440Hz 사인파 생성
+ float frequency = 440f;
+ float[] audioData = new float[samples];
+
+ for (int i = 0; i < samples; i++)
+ {
+ audioData[i] = Mathf.Sin(2f * Mathf.PI * frequency * i / sampleRate) * 0.5f;
+ }
+
+ // WAV로 변환
+ byte[] pcm16 = new byte[samples * 2];
+ int pcmIndex = 0;
+ for (int i = 0; i < samples; i++)
+ {
+ float clamped = Mathf.Clamp(audioData[i], -1f, 1f);
+ short s = (short)Mathf.RoundToInt(clamped * short.MaxValue);
+ pcm16[pcmIndex++] = (byte)(s & 0xFF);
+ pcm16[pcmIndex++] = (byte)((s >> 8) & 0xFF);
+ }
+
+ // WAV 헤더 추가
+ return ProjectVG.Infrastructure.Audio.WavEncoder.WrapPcm16ToWav(pcm16, 1, sampleRate);
+ }
}
///
diff --git a/Assets/UI/Prefabs/AudioInputView.prefab b/Assets/UI/Prefabs/AudioInputView.prefab
index d21feb9..1e7dda7 100644
--- a/Assets/UI/Prefabs/AudioInputView.prefab
+++ b/Assets/UI/Prefabs/AudioInputView.prefab
@@ -188,8 +188,8 @@ MonoBehaviour:
m_Calls: []
m_text: Stop
m_isRightToLeft: 0
- m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
- m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
+ m_fontAsset: {fileID: 11400000, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
+ m_sharedMaterial: {fileID: -7107498392211080374, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
m_fontSharedMaterials: []
m_fontMaterial: {fileID: 0}
m_fontMaterials: []
@@ -324,8 +324,8 @@ MonoBehaviour:
m_Calls: []
m_text: New Text
m_isRightToLeft: 0
- m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
- m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
+ m_fontAsset: {fileID: 11400000, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
+ m_sharedMaterial: {fileID: -7107498392211080374, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
m_fontSharedMaterials: []
m_fontMaterial: {fileID: 0}
m_fontMaterials: []
@@ -659,8 +659,8 @@ MonoBehaviour:
m_Calls: []
m_text: Recode
m_isRightToLeft: 0
- m_fontAsset: {fileID: 11400000, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
- m_sharedMaterial: {fileID: 2180264, guid: 8f586378b4e144a9851e7b34d9b748ee, type: 2}
+ m_fontAsset: {fileID: 11400000, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
+ m_sharedMaterial: {fileID: -7107498392211080374, guid: 30927a58b1be6fe418e6eacbf0e593c2, type: 2}
m_fontSharedMaterials: []
m_fontMaterial: {fileID: 0}
m_fontMaterials: []