# 📌 DataFrame에서 Dict 특정 Key-Value 추출
### ✏️ DB 연결

In [5]:
import dotenv
import os
from pymongo import MongoClient
import pandas as pd

# 환경변수 불러오기
dotenv.load_dotenv(dotenv.find_dotenv())
USER = os.environ["MONGODB_USER"] # MongoDB user
PASSWORD = os.environ["MONGODB_PW"] # MongoDB password
PORT = int(os.environ["MONGODB_PORT"]) # MongoDB port

# DB 연결
client = MongoClient("mongodb://" + USER + ":" + PASSWORD + "@j7e104.p.ssafy.io", PORT)

db = client.animation
dbcol_review = db.review

### ✏️ 사용할 Data 불러오기

In [6]:
profile = list(dbcol_review.find({}).limit(10))
profile

[{'_id': ObjectId('63229fd149994e87db0ad0ab'),
  'id': 34380175,
  'is_click_like': False,
  'count_like': 1,
  'profile': {'id': 4523846,
   'name': '노랑이용성(wns****)',
   'image': 'https://image.laftel.net/profiles/default/58888b41-8ecd-4f4e-a890-24b2023d7f29.png',
   'profile_rank': {'rank': 0, 'continued_membership_days': None}},
  'score': 5,
  'content': '5점.',
  'created': '2022-09-14T20:28:25.600188',
  'modified': '2022-09-14T20:28:25.603855',
  'is_spoiler': False,
  'animation': 39986},
 {'_id': ObjectId('63229fd149994e87db0ad0ac'),
  'id': 34378719,
  'is_click_like': False,
  'count_like': 0,
  'profile': {'id': 2976649,
   'name': '하낫둘(rla********)',
   'image': 'https://image.laftel.net/profiles/default/b700435b-3ad2-4a31-9b72-3e9ae631dc47.png',
   'profile_rank': {'rank': 2, 'continued_membership_days': None}},
  'score': 3.5,
  'content': '생각보다 재미가 없네 캐릭터들에 개성도 별로 없고 주변 인물들의 비중이 적음 개성 많은 캐릭터들이 많아야 볼만 할 것 같은데 작가의 역량이 부족해서 그런가 \n너무 등장인물들이 없고 등장 인물들의 스토리도 부족함',
  'created':

In [7]:
df = pd.DataFrame(profile)
df = df[["id", "profile", "content"]]
df["profile"] = df["profile"].map(lambda x:x["id"])
df

Unnamed: 0,id,profile,content
0,34380175,4523846,5점.
1,34378719,2976649,생각보다 재미가 없네 캐릭터들에 개성도 별로 없고 주변 인물들의 비중이 적음 개성 ...
2,34377222,4759828,재밌음
3,34375517,134827,재밌네?
4,34375366,5873764,주술회전 ㄴㄴ 고릴라회전 ㅇㅇ
5,34373921,5393838,이런 애니를 본 내가 참 자랑스럽다 주술회전 덕에 유타란 최애를\n만났어..
6,34362366,5842989,나나미
7,34368644,2420394,오락성을 위해 서사를 간추렸다고 보기엔\n그냥 서사 자체를 구성할 능력이 안되는데 ...
8,34368206,5745672,지금까지 이런 띵작은 없었다
9,34344213,5560570,"귀칼보다 고평가 받아야하는 작화, 스토리, 캐릭터 완벽한 애니"


# 📌 ani_feature에 있는 related를 ani_info에 저장

In [1]:
import dotenv
import os
from pymongo import MongoClient

# 환경변수 불러오기
dotenv.load_dotenv(dotenv.find_dotenv())
USER = os.environ["MONGODB_USER"] # MongoDB user
PASSWORD = os.environ["MONGODB_PW"] # MongoDB password
PORT = int(os.environ["MONGODB_PORT"]) # MongoDB port

# DB 연결
client = MongoClient("mongodb://" + USER + ":" + PASSWORD + "@j7e104.p.ssafy.io", PORT)

db = client.animations
dbcol_info = db.ani_info
dbcol_feat = db.ani_feature

In [3]:
import pandas as pd

df_info = pd.DataFrame(dbcol_info.find({}, {"id": 1}))
df_info = df_info[["id"]]
df_info.head()

Unnamed: 0,id
0,39986
1,39431
2,16075
3,38912
4,40562


In [6]:
df_related = pd.DataFrame(dbcol_feat.find({}, {"id": 1, "related": 1}))
df_related = df_related[["id", "related"]]
df_related.head()

Unnamed: 0,id,related
0,39986,"[18029, 14678, 40371, 15454, 40040, 39268, 403..."
1,39431,"[16709, 38588, 40664, 19356, 38981, 14464, 405..."
2,16075,"[38982, 39883, 20634, 23266, 19437, 37187, 409..."
3,38912,"[38798, 21211, 15184, 39881, 19809, 39587, 214..."
4,40562,"[16709, 38588, 40664, 38981, 40598, 40511, 371..."


In [24]:
related = pd.DataFrame(dbcol_feat.find_one({"id": 39986}, {"related": 1}), columns=["id", "related"])
related = related["related"].tolist()
related

[18029, 14678, 40371, 15454, 40040, 39268, 40375, 24255, 40195, 13706]

In [41]:
df_join = pd.merge(df_related, df_info, left_on="id", right_on="id", how='inner')
df_join.head()

Unnamed: 0,id,related
0,39986,"[18029, 14678, 40371, 15454, 40040, 39268, 403..."
1,39431,"[16709, 38588, 40664, 19356, 38981, 14464, 405..."
2,16075,"[38982, 39883, 20634, 23266, 19437, 37187, 409..."
3,38912,"[38798, 21211, 15184, 39881, 19809, 39587, 214..."
4,40562,"[16709, 38588, 40664, 38981, 40598, 40511, 371..."


In [42]:
def save_related(data):
    id = data["id"]
    dbcol_info.update_one({"id": id}, {"$set": {"related": data["related"]}}, upsert=True)

In [43]:
df_join.apply(save_related, axis=1)
print("비슷한 작품 DB 삽입 완료")

비슷한 작품 DB 삽입 완료
