In [6]:
pip install fastapi


Collecting fastapi
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting starlette<0.38.0,>=0.37.2 (from fastapi)
  Downloading starlette-0.37.2-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting fastapi-cli>=0.0.2 (from fastapi)
  Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)
Collecting httpx>=0.23.0 (from fastapi)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting python-multipart>=0.0.7 (from fastapi)
  Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)
Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi)
  Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86

In [7]:
pip install "uvicorn[standard]"




In [12]:
!pip install scikit-surprise

from fastapi import FastAPI
from pydantic import BaseModel
import json

app = FastAPI()
import pandas as pd
from ast import literal_eval
from collections import defaultdict
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split

# 사용자-책 데이터 로드
user_data = pd.read_csv("/content/drive/MyDrive/book/user_data.csv")
books = pd.read_csv("/content/drive/MyDrive/book/book_data_final.csv")

# tags 컬럼을 리스트로 변환
books['tags'] = books['tags'].apply(literal_eval)

# 필요한 열만 선택
user_data = user_data[['user_id', 'isbn1', 'isbn2', 'isbn3']]

# 사용자-책 평점 데이터 생성 (모든 평점을 5로 설정)
user_ratings = pd.melt(user_data, id_vars=['user_id'], value_vars=['isbn1', 'isbn2', 'isbn3'])
user_ratings = user_ratings.rename(columns={'value': 'isbn', 'variable': 'rating'})
user_ratings['rating'] = 5

# Surprise 라이브러리에 적합한 형식으로 변환
reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(user_ratings[['user_id', 'isbn', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(dataset, test_size=0.25)

# Create the SVD algorithm object
algo = SVD()

# Train the algorithm on the trainset
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Compute and print Root Mean Squared Error
accuracy.rmse(predictions)

# 키워드-책 매핑 딕셔너리 생성
keyword_to_books = defaultdict(list)
isbn_to_book_id = {}
book_id_to_isbn = {}
isbn_to_title = {}

for index, row in books.iterrows():
    book_id = row.name  # Pandas DataFrame의 index를 사용
    for tag in row['tags']:
        keyword_to_books[tag].append((book_id, row['isbn_no']))
    isbn_to_book_id[row['isbn_no']] = book_id
    book_id_to_isbn[book_id] = row['isbn_no']
    isbn_to_title[row['isbn_no']] = row['title']

def recommend_books_based_on_keywords(keywords, keyword_to_books, algo, isbn_to_book_id, isbn_to_title, top_n=10):
    book_scores = defaultdict(int)

    for keyword in keywords:
        for book_id, isbn in keyword_to_books.get(keyword, []):
            book_scores[(book_id, isbn)] += 1

    # 책들을 점수에 따라 정렬
    filtered_books = sorted(book_scores.items(), key=lambda x: x[1], reverse=True)

    # 학습된 모델을 사용하여 필터링된 책의 평점 예측
    predictions = []
    anonymous_user_id = 'anonymous_user'  # 고정된 사용자 ID
    for (book_id, isbn), _ in filtered_books:
        prediction = algo.predict(user_id, isbn)
        predictions.append((isbn, prediction.est))

    # 평점 예측 결과를 정렬하여 상위 top_n개 책을 추천
    predictions.sort(key=lambda x: x[1], reverse=True)
    return [(isbn, isbn_to_title[isbn]) for isbn, score in predictions[:top_n]]


# Pydantic 모델 정의
class Item(BaseModel):
    keyword: str

RMSE: 0.0075


In [13]:
@app.post("/keyword_to_isbn/")
async def create_item(item: Item):
  selected_keywords = item.keyword.split(',')
  recommended_books = recommend_books_based_on_keywords(selected_keywords, keyword_to_books, algo, isbn_to_book_id, isbn_to_title)
  result=""
  for isbn, title in recommended_books:
    result+=isbn +","

  return result