## Search

In [1]:
import requests
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
# import asyncio
from typing import Optional, List

# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    poster_path: Optional[str] = None
    language: str
    genre_ids: List[int]
    popularity: float
    release_date: Optional[str] = None

# API 요청 및 MongoDB 저장 함수
async def fetch_and_store_movies():
    # API 요청
    search_query = "기생충"
    url = f"https://api.themoviedb.org/3/search/movie?query={search_query}&include_adult=false&language=ko-KO&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
    }
    response = requests.get(url, headers=headers)
    data = response.json()

    # MongoDB 연결 설정
    client = AsyncIOMotorClient("mongodb://user:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    # 필요한 컬럼만 추출
    if "results" in data:
        movies = [
            Movie(
                movie_id=item["id"],
                title=item.get("title") or item.get("name"),
                original_title=item.get("original_title") or item.get("original_name"),
                poster_path=item.get("poster_path"),
                language=item.get("original_language"),
                genre_ids=item.get("genre_ids", []),
                popularity=item.get("popularity", 0.0),
                release_date=item.get("release_date") or item.get("first_air_date"),
            )
            for item in data["results"]
        ]

        # MongoDB에 저장
        if movies:
            await engine.save_all(movies)
            print("필터링된 데이터가 성공적으로 저장되었습니다!")
        else:
            print("저장할 데이터가 없습니다.")
    else:
        print("API 응답에 'results' 키가 없습니다.")

## Details

## Cast

In [12]:
import requests

movie_id = 313369
url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=ko-KR"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
}

response = requests.get(url, headers=headers)
response.json()

{'id': 313369,
 'cast': [{'adult': False,
   'gender': 2,
   'id': 30614,
   'known_for_department': 'Acting',
   'name': '라이언 고슬링',
   'original_name': 'Ryan Gosling',
   'popularity': 55.822,
   'profile_path': '/lyUyVARQKhGxaxy0FbPJCQRpiaW.jpg',
   'cast_id': 26,
   'character': 'Sebastian',
   'credit_id': '575d52d99251415a0a004d9f',
   'order': 0},
  {'adult': False,
   'gender': 1,
   'id': 54693,
   'known_for_department': 'Acting',
   'name': '엠마 스톤',
   'original_name': 'Emma Stone',
   'popularity': 65.466,
   'profile_path': '/cZ8a3QvAnj2cgcgVL6g4XaqPzpL.jpg',
   'cast_id': 27,
   'character': 'Mia',
   'credit_id': '575d52e3c3a3684516000671',
   'order': 1},
  {'adult': False,
   'gender': 2,
   'id': 113461,
   'known_for_department': 'Acting',
   'name': '존 레전드',
   'original_name': 'John Legend',
   'popularity': 7.476,
   'profile_path': '/cCv0YBy2YFFWp9h3kvNPmwWwrCD.jpg',
   'cast_id': 34,
   'character': 'Keith',
   'credit_id': '575d53ed9251412443001dc0',
   'order':

## 통합

In [18]:
import requests
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
from typing import Optional, List
import asyncio

# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    poster_path: Optional[str] = None
    language: str
    genre_ids: List[int]
    popularity: float
    release_date: Optional[str] = None
    cast: Optional[List[dict]] = None

# 단일 영화의 캐스트 정보 가져오기
async def fetch_cast(movie_id: int, headers: dict) -> List[dict]:
    credits_url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=ko-KR"
    response = await asyncio.to_thread(requests.get, credits_url, headers=headers)
    credits_data = response.json()
    return [
        {"cast_id": cast.get("id"), "name": cast.get("name")}
        for cast in credits_data.get("cast", [])[:8]
    ]  # 상위 8개만 반환

# 단일 영화 데이터를 처리하고 저장할 객체 생성
async def process_movie(item, headers, engine):
    movie_id = item["id"]
    cast_info = await fetch_cast(movie_id, headers)
    movie = Movie(
        movie_id=movie_id,
        title=item.get("title") or item.get("name"),
        original_title=item.get("original_title") or item.get("original_name"),
        poster_path=item.get("poster_path"),
        language=item.get("original_language"),
        genre_ids=item.get("genre_ids", []),
        popularity=item.get("popularity", 0.0),
        release_date=item.get("release_date") or item.get("first_air_date"),
        cast=cast_info,
    )
    await engine.save(movie)

# 전체 데이터 처리
async def fetch_and_store_movies():
    search_query = "청설"
    search_url = f"https://api.themoviedb.org/3/search/movie?query={search_query}&include_adult=false&language=ko-KR&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw",
    }
    response = requests.get(search_url, headers=headers)
    data = response.json()

    client = AsyncIOMotorClient("mongodb://user:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    if "results" in data:
        tasks = [
            process_movie(item, headers, engine)
            for item in data["results"]
        ]
        await asyncio.gather(*tasks)  # 병렬 처리
        print("필터링된 데이터와 캐스트 정보가 성공적으로 저장되었습니다!")
    else:
        print("API 응답에 'results' 키가 없습니다.")


In [27]:
import nest_asyncio
import asyncio

# Jupyter Notebook에서 nest_asyncio 적용
nest_asyncio.apply()

# 비동기 실행
if __name__ == "__main__":
    asyncio.run(fetch_and_store_movies())

필터링된 데이터와 캐스트 및 감독 정보가 성공적으로 저장되었습니다!


In [26]:
import requests
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
from typing import Optional, List

# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    poster_path: Optional[str] = None
    language: str
    genre_ids: List[int]
    popularity: float
    release_date: Optional[str] = None
    cast: Optional[List[dict]] = None
    director: Optional[dict] = None  # Director 정보 추가

# 단일 영화의 캐스트 및 감독 정보 가져오기
async def fetch_credit(movie_id: int, headers: dict) -> (List[dict], Optional[dict]):
    credits_url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=ko-KR"
    response = await asyncio.to_thread(requests.get, credits_url, headers=headers)
    credits_data = response.json()

    # 캐스트 정보 상위 8개
    cast = [
        {"cast_id": cast.get("id"), "name": cast.get("name")}
        for cast in credits_data.get("cast", [])[:8]
    ]

    # 감독 정보 필터링
    director = next(
        (
            {"id": crew["id"], "name": crew["name"]}
            for crew in credits_data.get("crew", [])
            if crew.get("job") == "Director"
        ),
        None,
    )

    return cast, director

# 단일 영화 데이터를 처리하고 저장할 객체 생성
async def process_movie(item, headers, engine):
    movie_id = item["id"]
    cast_info, director_info = await fetch_credit(movie_id, headers)

    movie = Movie(
        movie_id=movie_id,
        title=item.get("title") or item.get("name"),
        original_title=item.get("original_title") or item.get("original_name"),
        poster_path=item.get("poster_path"),
        language=item.get("original_language"),
        genre_ids=item.get("genre_ids", []),
        popularity=item.get("popularity", 0.0),
        release_date=item.get("release_date") or item.get("first_air_date"),
        cast=cast_info,
        director=director_info,
    )
    await engine.save(movie)

# 전체 데이터 처리
async def fetch_and_store_movies():
    search_query = "La La Land"
    search_url = f"https://api.themoviedb.org/3/search/movie?query={search_query}&include_adult=false&language=ko-KO&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw",
    }
    response = requests.get(search_url, headers=headers)
    data = response.json()

    client = AsyncIOMotorClient("mongodb://user:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    if "results" in data:
        tasks = [
            process_movie(item, headers, engine)
            for item in data["results"]
        ]
        await asyncio.gather(*tasks)  # 병렬 처리
        print("필터링된 데이터와 캐스트 및 감독 정보가 성공적으로 저장되었습니다!")
    else:
        print("API 응답에 'results' 키가 없습니다.")

In [29]:
import requests

url = "https://api.themoviedb.org/3/search/movie?query=La%20La%20Land&include_adult=false&language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
}

response = requests.get(url, headers=headers)

response.json()

{'page': 1,
 'results': [{'adult': False,
   'backdrop_path': '/qJeU7KM4nT2C1WpOrwPcSDGFUWE.jpg',
   'genre_ids': [35, 18, 10749, 10402],
   'id': 313369,
   'original_language': 'en',
   'original_title': 'La La Land',
   'overview': 'Mia, an aspiring actress, serves lattes to movie stars in between auditions and Sebastian, a jazz musician, scrapes by playing cocktail party gigs in dingy bars, but as success mounts they are faced with decisions that begin to fray the fragile fabric of their love affair, and the dreams they worked so hard to maintain in each other threaten to rip them apart.',
   'popularity': 88.015,
   'poster_path': '/uDO8zWDhfWwoFdKS4fzkUJt0Rf0.jpg',
   'release_date': '2016-11-29',
   'title': 'La La Land',
   'video': False,
   'vote_average': 7.903,
   'vote_count': 16818},
  {'adult': False,
   'backdrop_path': '/qaZ8CXv0thtQqZGRNbHuuZ9NlgT.jpg',
   'genre_ids': [18, 35, 10749],
   'id': 105789,
   'original_language': 'en',
   'original_title': 'Going Down in 

## 통합본 -> 세분화

In [38]:
import requests
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
import asyncio
from typing import Optional, List

# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    overview: Optional[str] = None
    poster_path: Optional[str] = None
    original_country: List[str] = []
    genre_ids: List[int]
    popularity: float
    release_date: Optional[str] = None
    cast: Optional[List[dict]] = None
    director: Optional[dict] = None

# API 헤더
HEADERS = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw",
}

# 단일 영화의 캐스트 및 감독 정보 가져오기
async def fetch_credit(movie_id: int) -> (List[dict], Optional[dict]):
    credits_url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=ko-KR"
    response = await asyncio.to_thread(requests.get, credits_url, headers=HEADERS)
    credits_data = response.json()

    # 캐스트 정보 상위 8개
    cast = [
        {"cast_id": cast.get("id"), "name": cast.get("name")}
        for cast in credits_data.get("cast", [])[:8]
    ]

    # 감독 정보 필터링
    director = next(
        (
            {"id": crew["id"], "name": crew["name"]}
            for crew in credits_data.get("crew", [])
            if crew.get("job") == "Director"
        ),
        None,
    )

    return cast, director

# 단일 영화 상세 정보 가져오기
async def fetch_movie_details(movie_id: int):
    details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=ko-KR"
    response = await asyncio.to_thread(requests.get, details_url, headers=HEADERS)
    return response.json()

# 단일 영화 데이터를 처리하고 저장
async def process_movie(movie_id: int, engine):
    # 영화 상세 정보 호출
    details = await fetch_movie_details(movie_id)
    if not details or "id" not in details:
        print(f"Details not found for movie_id {movie_id}")
        return

    # 크레딧 정보 호출
    cast_info, director_info = await fetch_credit(movie_id)

    # MongoDB에 저장
    movie = Movie(
        movie_id=details["id"],
        title=details.get("title"),
        original_title=details.get("original_title"),
        overview=details.get("overview"),
        poster_path=details.get("poster_path"),
        original_country=details.get("origin_country", []), 
        genre_ids=[genre["id"] for genre in details.get("genres", [])],
        popularity=details.get("popularity", 0.0),
        release_date=details.get("release_date"),
        cast=cast_info,
        director=director_info,
    )
    await engine.save(movie)
    print(f"Saved movie_id {movie_id}")

# 검색 API로 영화 ID 가져오기
async def search_movies(search_query: str) -> List[int]:
    search_url = f"https://api.themoviedb.org/3/search/movie?query={search_query}&include_adult=false&language=ko-KR&page=1"
    response = await asyncio.to_thread(requests.get, search_url, headers=HEADERS)
    data = response.json()
    return [item["id"] for item in data.get("results", [])]

# 메인 처리 함수
async def fetch_and_store_movies(search_query: str):
    client = AsyncIOMotorClient("mongodb://user:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    # 검색 API 호출
    movie_ids = await search_movies(search_query)
    if not movie_ids:
        print("No movies found for the search query.")
        return

    # 각 영화 ID에 대해 처리
    tasks = [process_movie(movie_id, engine) for movie_id in movie_ids]
    await asyncio.gather(*tasks)
    print("모든 영화 데이터를 저장했습니다.")

In [39]:
import nest_asyncio
import asyncio

nest_asyncio.apply()

# 비동기 실행
if __name__ == "__main__":
    search_query = "비긴 어게인"  # 검색어를 외부에서 정의
    asyncio.run(fetch_and_store_movies(search_query))

Saved movie_id 198277
모든 영화 데이터를 저장했습니다.


## 검색 분리

In [1]:
import requests
import pandas as pd
from typing import List, Optional
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
import asyncio


# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    overview: Optional[str] = None
    poster_path: Optional[str] = None
    original_country: List[str] = []
    genres: List[str] = []
    popularity: float 
    release_date: Optional[str] = None
    cast: Optional[List[dict]] = None
    director: Optional[dict] = None


# SEARCH API 호출 및 결과 반환 함수
def search_movies(query: str) -> pd.DataFrame:
    url = f"https://api.themoviedb.org/3/search/movie?query={query}&include_adult=false&language=ko-KO&page=1"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
    }
    response = requests.get(url, headers=headers)
    data = response.json()

    if "results" in data:
        movies = [
            {
                "id": item["id"],
                "title": item.get("title"),
                "original_title": item.get("original_title"),
                "release_date": item.get("release_date"),
                "popularity": item.get("popularity")
            }
            for item in data["results"]
        ]
        return pd.DataFrame(movies)
    else:
        print("API 응답에 'results' 키가 없습니다.")
        return pd.DataFrame()

### 중복 제거

In [186]:
import requests
import pandas as pd
from typing import List, Optional
from odmantic import AIOEngine, Model
from motor.motor_asyncio import AsyncIOMotorClient
import asyncio


# Odmantic 모델 정의
class Movie(Model):
    movie_id: int
    title: str
    original_title: Optional[str] = None
    overview: Optional[str] = None
    poster_path: Optional[str] = None
    original_country: List[str] = []
    genres: List[str] = []
    popularity: float 
    release_date: Optional[str] = None
    cast: Optional[List[dict]] = None
    director: Optional[dict] = None
    
# DETAIL 및 CREDIT API 호출 및 MongoDB 저장 함수
async def save_movie_to_db(movie_id: int):
    # MongoDB 연결 설정
    client = AsyncIOMotorClient("mongodb://root:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    # 기존 데이터 확인
    existing_movie = await engine.find_one(Movie, Movie.movie_id == movie_id)
    if existing_movie:
        print(f"영화 '{existing_movie.title}'는 이미 DB에 존재합니다. (movie_id: {movie_id})")
        return

    # DETAIL API 호출
    detail_url = f"https://api.themoviedb.org/3/movie/{movie_id}?language=ko-KR"
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
    }
    detail_response = requests.get(detail_url, headers=headers)
    detail_data = detail_response.json()

    # CREDIT API 호출
    credit_url = f"https://api.themoviedb.org/3/movie/{movie_id}/credits?language=ko-KR"
    credit_response = requests.get(credit_url, headers=headers)
    credit_data = credit_response.json()

    # 데이터 정리
    genres = [genre["name"] for genre in detail_data.get("genres", [])]
    cast = sorted(
        [{"cast_id": c["cast_id"], "name": c["name"]} for c in credit_data.get("cast", [])],
        key=lambda x: x["cast_id"]
    )[:8]
    director = next(
        (
            {"id": crew["id"], "name": crew["name"]}
            for crew in credit_data.get("crew", [])
            if crew.get("job") == "Director"
        ),
        None
    )

    # MongoDB 저장
    movie = Movie(
        movie_id=detail_data["id"],
        title=detail_data.get("title"),
        original_title=detail_data.get("original_title"),
        overview=detail_data.get("overview"),
        poster_path=detail_data.get("poster_path"),
        original_country=detail_data.get("origin_country"),
        genres=genres,
        popularity=detail_data.get("popularity"),
        release_date=detail_data.get("release_date"),
        cast=cast,
        director=director,
    )
    await engine.save(movie)
    print(movie)
    print(f"영화 '{movie.title}'가 MongoDB에 저장되었습니다.")

In [184]:
# MAIN 실행
if __name__ == "__main__":
    # Step 1: 검색 결과 반환
    query = input("검색어를 입력하세요: ")
    search_result = search_movies(query)
    if not search_result.empty:
        print("검색 결과:\n", search_result)
    else:
        print("검색 결과가 없습니다.")

검색 결과:
         id                 title        original_title release_date  \
0    20342            말할 수 없는 비밀                不能說的秘密   2007-07-27   
1   643267    19금 정사: 말할 수 없는 비밀    19금 정사: 말할 수 없는 비밀   2016-08-18   
2   797109            말할 수 없는 비밀            말할 수 없는 비밀   2023-10-17   
3  1205834            말할 수 없는 비밀                言えない秘密   2024-06-28   
4   734115  말할 수 없는 비밀 : 이모와의 관계  말할 수 없는 비밀 : 이모와의 관계   2020-07-06   

   popularity  
0       9.183  
1       0.338  
2       1.523  
3       3.601  
4       6.323  


In [187]:
# Step 2: 사용자 입력으로 상세 정보 저장
import nest_asyncio
nest_asyncio.apply()

movie_id = int(input("\n저장할 영화 ID를 입력하세요: "))
asyncio.run(save_movie_to_db(movie_id))

id=ObjectId('674ebf68e38a5f395f056a10') movie_id=20342 title='말할 수 없는 비밀' original_title='不能說的秘密' overview='예술학교로 전학 온 상륜(주걸륜)은 아버지의 영향을 받아 피아노에 천부적인 소질을 보인다. 학교를 둘러보던 중 신비스러운 피아노 연주가 흘러나오는 옛 음악실을 발견하게 되고, 그곳에서 샤오위(계륜미)를 만난다. 그들은 아름다운 피아노 선율처럼 즐거운 시간을 보내고, 둘 사이에는 애틋한 마음이 싹튼다. 그러나 상륜이 샤오위를 더 알고 싶어할 때마다 그녀는 비밀이라고 일관하며 의미심장한 미소만 짓는다. 어느 날 샤오위는 우연히 상륜이 같은 반 여학생 칭요와 키스하는 모습을 보게 되고, 그의 곁에서 사라지는데…' poster_path='/xvoXEBf4K1p68GtMSc5o00Z3f3e.jpg' original_country=['TW'] genres=['판타지', '드라마', '스릴러'] popularity=9.183 release_date='2007-07-27' cast=[{'cast_id': 1, 'name': '주걸륜'}, {'cast_id': 3, 'name': '계륜미'}, {'cast_id': 4, 'name': '황추생'}, {'cast_id': 5, 'name': 'Alice Tzeng'}, {'cast_id': 7, 'name': 'So Ming-Ming'}, {'cast_id': 11, 'name': 'Huang Jun-Lang'}, {'cast_id': 12, 'name': 'Zhan Yu-Hao'}, {'cast_id': 14, 'name': 'Du Guo-Zhang'}] director={'id': 17380, 'name': '주걸륜'}
영화 '말할 수 없는 비밀'가 MongoDB에 저장되었습니다.


In [39]:
import requests

url = "https://api.themoviedb.org/3/movie/489999?language=en-KR"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyNmZlYzEwOGU3M2Y3YmVmNTkzYzM3N2RjMzdjYjcyZCIsIm5iZiI6MTczMjg2MjU3NC43Miwic3ViIjoiNjc0OTYyNmU0OTE5MDljMWI3OWRlY2VkIiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.aumSeBsjfSdLck30QaMJjzeLi7ZZ4CMBOZS20p_AVdw"
}

response = requests.get(url, headers=headers)

response.json()

{'adult': False,
 'backdrop_path': '/efy2NsyMUEDOnZW0hHA8VrCNsoU.jpg',
 'belongs_to_collection': {'id': 1145924,
  'name': 'Searching Collection',
  'poster_path': '/jBJe6IHdgvoXXwx4MJVzUCsai7Z.jpg',
  'backdrop_path': '/lltgYoY6rYCMvNLteZKGIdn6xbR.jpg'},
 'budget': 1000000,
 'genres': [{'id': 18, 'name': 'Drama'},
  {'id': 9648, 'name': 'Mystery'},
  {'id': 53, 'name': 'Thriller'}],
 'homepage': 'http://www.searching.movie',
 'id': 489999,
 'imdb_id': 'tt7668870',
 'origin_country': ['US', 'MX'],
 'original_language': 'en',
 'original_title': 'Searching',
 'overview': "After David Kim's 16-year-old daughter goes missing, a local investigation is opened and a detective is assigned to the case. But 37 hours later and without a single lead, David decides to search the one place no one has looked yet, where all secrets are kept today: his daughter's laptop.",
 'popularity': 21.437,
 'poster_path': '/pk9R56ZFlofbBzfwBnHlDyg5DMs.jpg',
 'production_companies': [{'id': 11341,
   'logo_path': 

### 전체 데이터 가져오기

In [79]:
import asyncio
from odmantic import AIOEngine
from motor.motor_asyncio import AsyncIOMotorClient


# MongoDB에서 Movie 컬렉션의 데이터를 가져오는 함수
async def fetch_all_movies():
    # MongoDB 연결 설정
    client = AsyncIOMotorClient("mongodb://root:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    # 모든 영화 데이터를 가져오기
    movies = await engine.find(Movie)

    # 출력
    for movie in movies:
        print(f"ID: {movie.movie_id}, Title: {movie.title}, Genres: {movie.genres}")
    return movies

### 장르만

In [15]:
# 비동기 함수 실행
if __name__ == "__main__":
    asyncio.run(fetch_all_movies())

ID: 313369, Title: 라라랜드, Genres: ['코미디', '드라마', '로맨스', '음악']


In [17]:
import asyncio
from odmantic import AIOEngine
from motor.motor_asyncio import AsyncIOMotorClient


# MongoDB에서 Movie 컬렉션의 데이터를 가져오는 함수
async def fetch_all_movies():
    # MongoDB 연결 설정
    client = AsyncIOMotorClient("mongodb://root:team3@localhost:27017/")
    engine = AIOEngine(client=client, database="movies_db")

    # 모든 영화 데이터를 가져오기
    movies = await engine.find(Movie)

    # 출력: 각 영화의 cast와 director 포함
    for movie in movies:
        print(f"ID: {movie.movie_id}, Title: {movie.title}")
        print(f"Genres: {movie.genres}")
        print(f"Cast: {movie.cast}")  # cast는 리스트로 저장됨
        print(f"Director: {movie.director}")  # director는 딕셔너리로 저장됨
        print("=" * 40)
    return movies


# 비동기 함수 실행
if __name__ == "__main__":
    asyncio.run(fetch_all_movies())

ID: 313369, Title: 라라랜드
Genres: ['코미디', '드라마', '로맨스', '음악']
Cast: [{'cast_id': 26, 'name': '라이언 고슬링'}, {'cast_id': 27, 'name': '엠마 스톤'}, {'cast_id': 28, 'name': 'Sandra Rosko'}, {'cast_id': 29, 'name': 'J.K. 시몬스'}, {'cast_id': 30, 'name': 'Finn Wittrock'}, {'cast_id': 31, 'name': '미즈노 소노야'}, {'cast_id': 32, 'name': '로즈메리 디윗'}, {'cast_id': 34, 'name': '존 레전드'}]
Director: {'id': 136495, 'name': '데이미언 셔젤'}


In [198]:
import asyncio
import pandas as pd
from odmantic import AIOEngine
from motor.motor_asyncio import AsyncIOMotorClient


# MongoDB 연결 설정
client = AsyncIOMotorClient("mongodb://root:team3@localhost:27017/")
engine = AIOEngine(client=client, database="movies_db5")


# 데이터 가져오기 함수
async def fetch_movies():
    movies = await engine.find(Movie)

    # 영화 정보를 저장할 리스트
    movie_data = []

    for movie in movies:
        # cast에서 name만 필터링
        filtered_cast_names = [c["name"] for c in movie.cast] if movie.cast else None
        
        # director에서 name만 필터링
        director_name = movie.director["name"] if movie.director else None

        # 영화 정보를 딕셔너리로 저장
        movie_data.append({
            "Title": movie.title,
            "Genres": movie.genres,
            "Cast Names": filtered_cast_names,
            "Director Name": director_name,
            "Country": movie.original_country
        })

    # DataFrame 생성
    df = pd.DataFrame(movie_data)
    return df


# 비동기 실행
movie5 = asyncio.run(fetch_movies())

In [199]:
all_genres5 = [genre for genres in movie5["Genres"] for genre in genres]
genre_counts5 = pd.Series(all_genres5).value_counts().reset_index()
genre_counts5.columns = ["Genre", "Count"]

genre_counts5 = genre_counts5[genre_counts5['Count'] > 1]
genre_counts5

Unnamed: 0,Genre,Count
0,스릴러,5
1,드라마,5
2,범죄,4
3,코미디,3
4,미스터리,3
5,공포,2
6,판타지,2


In [204]:
all_genres5 = [genre for genres in movie5["Genres"] for genre in genres]
genre_counts5 = pd.Series(all_genres5).value_counts().reset_index()
genre_counts5.columns = ["Genre", "Count"]
genre_counts5

genre_counts5 = genre_counts5[genre_counts5['Count'] > 1]
genre_counts5

Unnamed: 0,Genre,Count
0,스릴러,5
1,드라마,5
2,범죄,4
3,코미디,3
4,미스터리,3
5,공포,2
6,판타지,2


In [206]:
genre_counts

Unnamed: 0,Genre,Count
0,드라마,7
1,로맨스,5
2,코미디,5
3,음악,4
4,판타지,2


In [29]:
all_casts = [genre for genres in df["Cast Names"] for genre in genres]
cast_counts = pd.Series(all_casts).value_counts().reset_index()
cast_counts.columns = ["Cast", "Count"]
cast_counts

Unnamed: 0,Cast,Count
0,라이언 고슬링,1
1,엠마 스톤,1
2,Sandra Rosko,1
3,J.K. 시몬스,1
4,Finn Wittrock,1
5,미즈노 소노야,1
6,로즈메리 디윗,1
7,존 레전드,1


In [310]:
all_countries4 = [genre for genres in df4["Country"] for genre in genres]
country_counts4 = pd.Series(all_countries4).value_counts().reset_index()
country_counts4.columns = ["Country", "Count"]
country_counts4

Unnamed: 0,Country,Count
0,US,3
1,KR,3
2,JP,2
3,AU,1
4,HK,1
5,FR,1
6,IT,1
7,GB,1


In [207]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
from sklearn.metrics.pairwise import cosine_similarity

# Store DataFrames in a list
genre_counts_list = [genre_counts, genre_counts2, genre_counts3, genre_counts4, genre_counts5]

# Merge all genres and create a unified DataFrame
all_genres = set()
for df in genre_counts_list:
    all_genres.update(df['Genre'])

all_genres = sorted(all_genres)  # Sort genres for consistency

# Create unified count DataFrame for all users
user_genre_matrix = pd.DataFrame({genre: [0] * len(genre_counts_list) for genre in all_genres})

for i, df in enumerate(genre_counts_list):
    for _, row in df.iterrows():
        user_genre_matrix.loc[i, row['Genre']] = row['Count']

# 1. Weighted Cosine Similarity
weighted_cosine_sim = cosine_similarity(user_genre_matrix)

# 2. Spearman Correlation
spearman_corr = np.zeros((len(genre_counts_list), len(genre_counts_list)))

for i in range(len(genre_counts_list)):
    for j in range(len(genre_counts_list)):
        corr, _ = spearmanr(user_genre_matrix.iloc[i], user_genre_matrix.iloc[j])
        spearman_corr[i, j] = corr

# Convert results to DataFrame for better visualization
weighted_cosine_df = pd.DataFrame(weighted_cosine_sim, columns=["규석", "정은", "수진", "영인", "우현"], index=["규석", "정은", "수진", "영인", "우현"])
spearman_corr_df = pd.DataFrame(spearman_corr, columns=["규석", "정은", "수진", "영인", "우현"], index=["규석", "정은", "수진", "영인", "우현"])

# Display results
print("Weighted Cosine Similarity:")
print(weighted_cosine_df)
print("\nSpearman Correlation:")
print(spearman_corr_df)

Weighted Cosine Similarity:
          규석        정은        수진        영인        우현
규석  1.000000  0.723138  0.762913  0.544151  0.516091
정은  0.723138  1.000000  0.703953  0.582792  0.461061
수진  0.762913  0.703953  1.000000  0.732882  0.734182
영인  0.544151  0.582792  0.732882  1.000000  0.839894
우현  0.516091  0.461061  0.734182  0.839894  1.000000

Spearman Correlation:
          규석        정은        수진        영인        우현
규석  1.000000  0.547719  0.433962  0.073627  0.103907
정은  0.547719  1.000000  0.357724  0.169700 -0.066835
수진  0.433962  0.357724  1.000000  0.386757  0.487502
영인  0.073627  0.169700  0.386757  1.000000  0.624050
우현  0.103907 -0.066835  0.487502  0.624050  1.000000


In [221]:
weighted_cosine_df

Unnamed: 0,규석,정은,수진,영인,우현
규석,1.0,0.723138,0.762913,0.544151,0.516091
정은,0.723138,1.0,0.703953,0.582792,0.461061
수진,0.762913,0.703953,1.0,0.732882,0.734182
영인,0.544151,0.582792,0.732882,1.0,0.839894
우현,0.516091,0.461061,0.734182,0.839894,1.0


In [222]:
spearman_corr_df

Unnamed: 0,규석,정은,수진,영인,우현
규석,1.0,0.547719,0.433962,0.073627,0.103907
정은,0.547719,1.0,0.357724,0.1697,-0.066835
수진,0.433962,0.357724,1.0,0.386757,0.487502
영인,0.073627,0.1697,0.386757,1.0,0.62405
우현,0.103907,-0.066835,0.487502,0.62405,1.0


In [208]:
dfs = [movie, movie2, movie3, movie4, movie5] 

# Create sets of titles from each DataFrame
movies = [set(df['Title']) for df in dfs]

In [215]:
import pandas as pd
from itertools import combinations

# Calculate Jaccard similarity between all pairs of sets
results = []
for (i, s1), (j, s2) in combinations(enumerate(movies, start=1), 2):
    intersection = len(s1 & s2)
    union = len(s1 | s2)
    jaccard_similarity = intersection / union
    results.append({"Set1": i, "Set2": j, "Jaccard Similarity": jaccard_similarity})

# Convert results to a DataFrame
jaccard_df = pd.DataFrame(results)

In [216]:
jaccard_df

Unnamed: 0,Set1,Set2,Jaccard Similarity
0,1,2,0.111111
1,1,3,0.0
2,1,4,0.052632
3,1,5,0.05
4,2,3,0.0
5,2,4,0.0
6,2,5,0.05
7,3,4,0.0
8,3,5,0.0
9,4,5,0.0


In [212]:
movie5

Unnamed: 0,Title,Genres,Cast Names,Director Name,Country
0,위대한 레보스키,"[코미디, 범죄]","[제프 브리지스, 존 굿맨, 줄리앤 무어, 스티브 부세미, David Huddles...",조엘 코엔,[US]
1,이레이저 헤드,[공포],"[Jack Nance, Charlotte Stewart, Allen Joseph, ...",데이비드 린치,[US]
2,멀홀랜드 드라이브,"[스릴러, 드라마, 미스터리]","[나오미 왓츠, Laura Harring, Ann Miller, Dan Hedaya...",데이비드 린치,[US]
3,블루 벨벳,"[미스터리, 스릴러, 범죄, 로맨스]","[이사벨라 로셀리니, 카일 맥라클란, 데니스 호퍼, 로라 던, 호프 랭, Dean ...",데이비드 린치,[US]
4,토니 타키타니,[드라마],"[Takahumi Shinohara, 니시지마 히데토시, イッセー尾形, 미야자와 리...",이치가와 준,[JP]
5,혐오스런 마츠코의 일생,"[코미디, 드라마, 음악]","[나카타니 미키, 나가야마 에이타, 이세야 유스케, 市川実日子, 黒沢あすか, 武田真...",나카시마 테츠야,[JP]
6,곡성,"[공포, 미스터리]","[곽도원, 황정민, 천우희, 장소연, 조한철, 쿠니무라 준, 김환희, 손강국]",나홍진,[KR]
7,굿타임,"[범죄, 스릴러]","[로버트 패틴슨, 제니퍼 제이슨 리, Barkhad Abdi, 베니 사프디, Mar...",조쉬 사프디,[US]
8,보 이즈 어프레이드,"[코미디, 모험, 판타지]","[호아킨 피닉스, 네이선 레인, 패티 루폰, 에이미 라이언, Kylie Rogers...",아리 애스터,[US]
9,언컷 젬스,"[드라마, 스릴러, 범죄]","[애덤 샌들러, Judd Hirsch, Eric Bogosian, 라키스 스탠필드,...",조쉬 사프디,[US]


In [180]:
df4

Unnamed: 0,Title,Genres,Cast Names,Director Name,Country
0,헤이트풀8,"[드라마, 미스터리, 서부]","[사무엘 L. 잭슨, 커트 러셀, 월튼 고긴스, 제니퍼 제이슨 리, 팀 로스, 조이...",쿠엔틴 타란티노,[US]
1,반지의 제왕: 왕의 귀환,"[모험, 판타지, 액션]","[일라이저 우드, 이안 맥켈런, 비고 모텐슨, 리브 타일러, 올랜도 블룸, 존 라이...",피터 잭슨,[US]
2,조커,"[범죄, 스릴러, 드라마]","[재지 비츠, 로버트 드 니로, 프랜시스 콘로이, Josh Pais, 마크 매런, ...",토드 필립스,[US]
3,캐리비안의 해적: 망자의 함,"[모험, 판타지, 액션]","[키이라 나이틀리, 올랜도 블룸, Jack Davenport, 빌 나이, 스텔란 스...",고어 버빈스키,[US]
4,서울의 봄,"[드라마, 역사, 스릴러, 범죄, 전쟁]","[황정민, 정우성, 박해준, 이성민, 김성균, Choe Min, 전진기, Yum D...",김성수,[KR]
5,기생충,"[코미디, 스릴러, 드라마]","[송강호, 이선균, 조여정, 최우식, 박소담, 장혜진, 이정은, 정지소]",봉준호,[KR]
6,보헤미안 랩소디,"[음악, 드라마]","[벤 하디, Gwilym Lee, Joseph Mazzello, 루시 보인턴, 마이...",브라이언 싱어,"[US, GB]"
7,건축학개론,"[코미디, 로맨스]","[한가인, 이제훈, 수지, 고준희, 엄태웅, 유연석, 조정석, 조현철]",이용주,[KR]
8,존 윅 4,"[액션, 스릴러, 범죄]","[키아누 리브스, 견자단, 빌 스카스가드, 이언 맥셰인, 로렌스 피시번, 랜스 레딕...",채드 스타헬스키,[US]
9,쇼생크 탈출,"[드라마, 범죄]","[팀 로빈스, 모건 프리먼, Bob Gunton, William Sadler, 클랜...",프랭크 다라본트,[US]
