## 필요한 라이브러리 설치 및 import

In [1]:
import librosa
import io
import numpy as np
import uvicorn
import torch
import torch.nn as nn
from pydub import AudioSegment
from fastapi import FastAPI, File, UploadFile
from threading import Thread
import matplotlib.pyplot as plt
import seaborn as sb
import pathlib
from torchvision import transforms, models
from pyngrok import ngrok
from sklearn.preprocessing import LabelEncoder



## 모델 로드

In [3]:
# 모델 정의
class CryingResNet(nn.Module):
    def __init__(self, num_classes):
        super(CryingResNet, self).__init__()
        self.base_model = models.resnet50(pretrained=False)
        self.base_model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(self.base_model.fc.in_features, num_classes)
        )

    def forward(self, x):
        return self.base_model(x)

# 모델 및 인코더 로드
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = 'analyzeCrying.pth'
model = CryingResNet(num_classes=4).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

label_encoder = LabelEncoder()
label_encoder.classes_ = np.array(['discomfort', 'hungry', 'pain', 'tired'])


## 함수

In [4]:
app = FastAPI()

# 데이터 전처리 함수
def audio_to_melspectrogram(signal, sr, n_mels=128, fmax=8000, hop_length=256):
    mel_spec = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=n_mels, fmax=fmax, hop_length=hop_length)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

def split_audio_into_windows(signal, sr, window_size=1.0, hop_size=0.5):
    window_samples = int(window_size * sr)
    hop_samples = int(hop_size * sr)
    windows = []
    for start in range(0, len(signal) - window_samples + 1, hop_samples):
        end = start + window_samples
        windows.append(signal[start:end])
    return windows

def is_crying_window(window, sr, threshold=0.01):
    rms = librosa.feature.rms(y=window)[0]
    return np.max(rms) > threshold

def preprocess_audio_with_windows(file_content, sample_rate=16000, window_size=1.0, hop_size=0.5, img_width=431):
    signal, sr = librosa.load(io.BytesIO(file_content), sr=sample_rate)
    if len(signal) == 0:
        return None

    windows = split_audio_into_windows(signal, sr, window_size, hop_size)
    selected_windows = [window for window in windows if is_crying_window(window, sr)]

    if len(selected_windows) == 0:
        return None

    # 울음소리 시작 윈도우 찾기
    start_index = next((i for i, window in enumerate(windows) if is_crying_window(window, sr)), None)
    if start_index is None:
        return None

    # 선택된 첫 번째 윈도우를 Mel-spectrogram으로 변환
    mel_spec = audio_to_melspectrogram(windows[start_index], sr)
    if mel_spec.shape[1] > img_width:
        mel_spec = mel_spec[:, :img_width]
    else:
        mel_spec = np.pad(mel_spec, ((0, 0), (0, img_width - mel_spec.shape[1])), 'constant')

    mel_spec = torch.tensor(mel_spec, dtype=torch.float32).unsqueeze(0).repeat(3, 1, 1)
    mel_spec = transforms.Normalize((0.5,), (0.5,))(mel_spec)
    mel_spec = mel_spec.unsqueeze(0)  # Add batch dimension
    return mel_spec

# FastAPI

In [5]:
@app.post("/api/v1/predict")
async def predict(data: UploadFile = File(...)):
    file_contents = await data.read()
    mel_spec = preprocess_audio_with_windows(file_contents)

    if mel_spec is None:
        return {"prediction": "hungry"}

    with torch.no_grad():
        mel_spec = mel_spec.to(device)  # GPU로 이동
        output = model(mel_spec)
        pred_label = torch.argmax(output, dim=1).item()

    predicted_class = label_encoder.inverse_transform([pred_label])[0]
    return {"prediction": predicted_class}

# 로컬서버 실행 및 Ngrok 설정

In [6]:
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="debug")

# 서버를 백그라운드에서 실행
thread = Thread(target=run)
thread.start()

# ngrok을 통해 포트 8000에 연결
ngrok.set_auth_token("2fnaK9lUVPQLxAIc1IqqHDZWy69_5AMZQM2HYGvbAwG4wKFem")
ngrok.connect(8000, hostname="i-con-analyze.ngrok.io")

INFO:     Started server process [25828]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


<NgrokTunnel: "https://i-con-analyze.ngrok.io" -> "http://localhost:8000">