Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions .github/workflows/deploy-ai.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
name: Deploy AI Service To EC2

on:
workflow_dispatch:
inputs:
image_tag:
description: "Docker image tag to deploy (default: commit SHA)"
required: false
type: string
pull_request:
types:
- closed

env:
AWS_REGION: ap-northeast-2
SERVICE_NAME: machine
ECR_REPO: oplust-machine

jobs:
build-and-push:
if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main' && github.event.pull_request.head.ref == 'develop') }}
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}

- name: Login to ECR
uses: aws-actions/amazon-ecr-login@v2

- name: Ensure ECR repository exists
run: |
aws ecr describe-repositories --repository-names "${{ env.ECR_REPO }}" >/dev/null 2>&1 || \
aws ecr create-repository --repository-name "${{ env.ECR_REPO }}" >/dev/null

- name: Download tagging model
run: aws s3 cp "${{ secrets.AI_TAGGING_MODEL_S3_URI }}" ./apps/machine/models/tagging/ --recursive

- name: Download recommend model
run: aws s3 cp "${{ secrets.AI_RECOMMEND_MODEL_S3_URI }}" ./apps/machine/models/recommend/ --recursive

- name: Build and push image
env:
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com
IMAGE_TAG_INPUT: ${{ github.event.inputs.image_tag }}
run: |
IMAGE_TAG="${IMAGE_TAG_INPUT:-${GITHUB_SHA}}"
IMAGE_URI="${ECR_REGISTRY}/${ECR_REPO}:${IMAGE_TAG}"
IMAGE_URI_LATEST="${ECR_REGISTRY}/${ECR_REPO}:latest"

docker build \
-f "apps/machine/Dockerfile" \
-t "${IMAGE_URI}" \
-t "${IMAGE_URI_LATEST}" \
.

docker push "${IMAGE_URI}"
docker push "${IMAGE_URI_LATEST}"

deploy:
if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main' && github.event.pull_request.head.ref == 'develop') }}
runs-on: ubuntu-latest
needs: build-and-push

steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}

- name: Deploy AI service to EC2 via SSM
env:
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_REGION }}.amazonaws.com
IMAGE_TAG_INPUT: ${{ github.event.inputs.image_tag }}
PROJECT_NAME: oplust
SSM_MACHINE_ENV_PARAM: /oplust/machine/env
run: |
set -euo pipefail

IMAGE_TAG="${IMAGE_TAG_INPUT:-${GITHUB_SHA}}"
IMAGE_URI="${ECR_REGISTRY}/${ECR_REPO}:${IMAGE_TAG}"
TARGET_TAG="${PROJECT_NAME}-machine-ec2"
CONTAINER_NAME="oplust-machine"
ENV_FILE="/etc/oplust/machine.env"
PORT="8000"

MONITORING_PRIVATE_IP=$(aws ec2 describe-instances \
--region "$AWS_REGION" \
--filters "Name=tag:Name,Values=${PROJECT_NAME}-monitoring-ec2" "Name=instance-state-name,Values=running" \
--query "Reservations[0].Instances[0].PrivateIpAddress" \
--output text)

if [ -z "$MONITORING_PRIVATE_IP" ] || [ "$MONITORING_PRIVATE_IP" = "None" ]; then
echo "No running monitoring instance found for tag: ${PROJECT_NAME}-monitoring-ec2" >&2
exit 1
fi

INSTANCE_ID=$(aws ec2 describe-instances \
--region "$AWS_REGION" \
--filters "Name=tag:Name,Values=${TARGET_TAG}" "Name=instance-state-name,Values=running" \
--query "Reservations[0].Instances[0].InstanceId" \
--output text)

if [ -z "$INSTANCE_ID" ] || [ "$INSTANCE_ID" = "None" ]; then
echo "No running instance found for tag: ${TARGET_TAG}" >&2
exit 1
fi

PROMTAIL_CONFIG_B64=$(printf '%s\n' \
'server:' \
' http_listen_port: 9080' \
' grpc_listen_port: 0' \
'' \
'positions:' \
' filename: /tmp/positions.yaml' \
'' \
'clients:' \
" - url: http://${MONITORING_PRIVATE_IP}:3100/loki/api/v1/push" \
'' \
'scrape_configs:' \
' - job_name: docker' \
' static_configs:' \
' - targets: [localhost]' \
' labels:' \
' job: docker' \
' role: ai' \
' __path__: /var/lib/docker/containers/*/*-json.log' \
' pipeline_stages:' \
' - docker: {}' \
| base64 | tr -d '\n')

PARAMS_FILE=$(mktemp)
COMMANDS=(
"set -e"
"sudo mkdir -p /etc/oplust"
"SERVICE_ENV=\$(aws ssm get-parameter --region $AWS_REGION --name '$SSM_MACHINE_ENV_PARAM' --with-decryption --query 'Parameter.Value' --output text)"
"printf '%s\n' \"\$SERVICE_ENV\" | sudo tee ${ENV_FILE} >/dev/null"
"sudo chmod 600 ${ENV_FILE}"
"aws ecr get-login-password --region $AWS_REGION | sudo docker login --username AWS --password-stdin $ECR_REGISTRY"
"sudo docker pull ${IMAGE_URI}"
"sudo docker rm -f ${CONTAINER_NAME} || true"
"sudo docker run -d --name ${CONTAINER_NAME} --restart unless-stopped -p ${PORT}:${PORT} --env-file ${ENV_FILE} ${IMAGE_URI}"
"sudo mkdir -p /opt/oplust-promtail /opt/oplust-promtail/positions"
"echo '${PROMTAIL_CONFIG_B64}' | base64 -d | sudo tee /opt/oplust-promtail/promtail.yml >/dev/null"
"sudo docker rm -f promtail || true"
"sudo docker run -d --name promtail --restart unless-stopped -v /opt/oplust-promtail/promtail.yml:/etc/promtail/config.yml:ro -v /opt/oplust-promtail/positions:/tmp -v /var/lib/docker/containers:/var/lib/docker/containers:ro grafana/promtail:2.9.8 -config.file=/etc/promtail/config.yml"
)
printf '%s\n' "${COMMANDS[@]}" | jq -R . | jq -s '{commands: .}' > "$PARAMS_FILE"

COMMAND_ID=$(aws ssm send-command \
--region "$AWS_REGION" \
--instance-ids "$INSTANCE_ID" \
--document-name "AWS-RunShellScript" \
--comment "Deploy ${CONTAINER_NAME}:${IMAGE_TAG}" \
--parameters "file://${PARAMS_FILE}" \
--query 'Command.CommandId' \
--output text)

rm -f "$PARAMS_FILE"

echo "[${CONTAINER_NAME}] command id: ${COMMAND_ID} (instance: ${INSTANCE_ID})"

for _ in $(seq 1 120); do
STATUS=$(aws ssm get-command-invocation \
--region "$AWS_REGION" \
--command-id "$COMMAND_ID" \
--instance-id "$INSTANCE_ID" \
--query 'Status' \
--output text 2>/dev/null || true)

case "$STATUS" in
Success)
echo "[${CONTAINER_NAME}] deployment success"
exit 0
;;
Failed|Cancelled|TimedOut)
echo "[${CONTAINER_NAME}] deployment failed with status: ${STATUS}" >&2
aws ssm get-command-invocation --region "$AWS_REGION" --command-id "$COMMAND_ID" --instance-id "$INSTANCE_ID" --query '{StdOut:StandardOutputContent,StdErr:StandardErrorContent}' --output json || true
exit 1
;;
Pending|InProgress|Delayed|"")
sleep 5
;;
*)
echo "[${CONTAINER_NAME}] unexpected status: ${STATUS}" >&2
sleep 5
;;
esac
done

echo "[${CONTAINER_NAME}] deployment timed out waiting for SSM command completion" >&2
aws ssm get-command-invocation --region "$AWS_REGION" --command-id "$COMMAND_ID" --instance-id "$INSTANCE_ID" --query '{StdOut:StandardOutputContent,StdErr:StandardErrorContent}' --output json || true
exit 1
8 changes: 1 addition & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,7 @@ __pycache__/
*.pyc

# ML artifacts
apps/machine/models/klue_saved_model/checkpoints/
apps/machine/models/klue_saved_model/*.bin
apps/machine/models/klue_saved_model/training_args.bin
apps/machine/models/klue_saved_model/optimizer.pt
apps/machine/models/klue_saved_model/scheduler.pt
apps/machine/models/klue_saved_model/rng_state.pth
apps/machine/models/klue_saved_model/trainer_state.json
apps/machine/models/


### Node ###
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import org.springframework.context.annotation.ComponentScan;
import org.springframework.data.jpa.repository.config.EnableJpaAuditing;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
import org.springframework.scheduling.annotation.EnableAsync;

@SpringBootApplication
@ComponentScan(basePackages = "com.ott")
@EntityScan(basePackages = "com.ott.domain")
@EnableJpaRepositories(basePackages = "com.ott.domain")
@EnableJpaAuditing
@EnableAsync
Comment thread
marulog marked this conversation as resolved.
public class ApiAdminApplication {

public static void main(String[] args) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.reactive.function.client.WebClient;

import java.time.Duration;
Expand All @@ -19,8 +20,15 @@ public class AiClient {

private final WebClient aiWebClient;

@Value("${ai.timeout-ms}")
private Long timeoutMs;

/**
* FastAPI 서버에 영상 줄거리를 보내고 감정 태그 리스트를 받아옵니다.
* 현재는 비동기 + 블로킹으로 AI 서버의 응답을 기다리고 있습니다.
* 다만, 관리자 서버의 요청 스레드(Tomcat)가 블로킹하는게 아닌, 관리자 서버 요청 스레드는 비동기로 바로 반환되고
* 비동기 작업에서 사용되는 스레드(Async)로 해당 AI서버의 응답을 블로킹 하기 때문에 더 효율적이라 판단했습니다.
* 추후, 유저도 업로드로 확장 된다면 비동기 + 논블로킹도 좋은 방법이라 생각됩니다.
*/
public List<String> getEmotionTags(Long mediaId, String description) {
log.info("[Admin AI] 미디어 태깅 요청: mediaId={}", mediaId);
Expand All @@ -33,7 +41,7 @@ public List<String> getEmotionTags(Long mediaId, String description) {
.bodyValue(requestDto)
.retrieve()
.bodyToMono(TaggingResponse.class)
.timeout(Duration.ofSeconds(5))
.timeout(Duration.ofMillis(timeoutMs)) // 해당 시간까지 AI작업이 끝나야함을 명시
.block(); // 비동기 작업 내에서 안전하게 블로킹 처리

if (response == null || response.getMoodTags() == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
@AllArgsConstructor
public class TaggingRequest {
@JsonProperty("media_id")
private Long mediaId; // 에러 로깅이나 추적을 위해 남겨둠

private String description; // 영상 줄거리 (AI 분석의 핵심 재료)
private Long mediaId;
private String description;
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ public class WebClientConfig {

@Bean
public WebClient aiWebClient(
@Value("${ai.base-url:http://localhost:8000}") String baseUrl,
@Value("${ai.timeout-ms:2000}") long timeoutMs
@Value("${ai.base-url}") String baseUrl,
@Value("${ai.timeout-ms}") long timeoutMs
) {
HttpClient httpClient = HttpClient.create()
.responseTimeout(Duration.ofMillis(timeoutMs));
.responseTimeout(Duration.ofMillis(timeoutMs)); // 응답이 해당 시간까지 안오면 끊겠다.

return WebClient.builder()
.baseUrl(baseUrl)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.ott.api_admin.content.dto.response.ContentsUpdateResponse;
import com.ott.api_admin.content.dto.response.ContentsUploadResponse;
import com.ott.api_admin.content.mapper.BackOfficeContentsMapper;
import com.ott.api_admin.tagging.event.AiTaggingRequestedEvent;
import com.ott.api_admin.upload.support.MediaTagLinker;
import com.ott.api_admin.upload.support.UploadHelper;
import com.ott.common.web.exception.BusinessException;
Expand All @@ -25,6 +26,7 @@
import com.ott.domain.series.domain.Series;
import com.ott.domain.series.repository.SeriesRepository;
import lombok.RequiredArgsConstructor;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
Expand All @@ -45,6 +47,7 @@ public class BackOfficeContentsService {
private final SeriesRepository seriesRepository;
private final UploadHelper uploadHelper;
private final MediaTagLinker mediaTagLinker;
private final ApplicationEventPublisher eventPublisher;

@Transactional(readOnly = true)
public PageResponse<ContentsListResponse> getContents(int page, int size, String searchWord, PublicStatus publicStatus) {
Expand Down Expand Up @@ -148,6 +151,9 @@ public ContentsUploadResponse createContentsUpload(ContentsUploadRequest request

mediaTagLinker.linkTags(media, request.categoryId(), request.tagIdList());

// 임시로 해당 위치로 삽입 상태 관리 픽스 후 추후 변경 예정
eventPublisher.publishEvent(new AiTaggingRequestedEvent(media.getId(), request.description()));

return backOfficeContentsMapper.toContentsUploadResponse(
contentsId,
mediaCreateUploadResult.posterObjectKey(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package com.ott.api_admin.tagging.event;

public record AiTaggingRequestedEvent(Long mediaId, String description) {
}
Loading