In [None]:
import sys
import os
from datetime import datetime
import logging
import pandas as pd
import ast

if 'root_dir' not in globals():
    # rootディレクトリへのパスを設定
    global root_dir
    root_dir = os.path.abspath(os.path.join(os.getcwd(), '../'))
    os.chdir(root_dir)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [None]:
search_words = ['ヴィーガン','ビーガン']
#search_words = '_'.join(search_words)
now = datetime.now()
font_path = '/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc'

# フォーマットを指定して文字列に変換
#now = now.strftime("%Y%m%d%H%M%S")
#now = 20240821104549

In [None]:
#データベースへ接続するエンジンを作成
from my_codes.database_setting import Engine
from my_codes.database_setting import Base

#データベースのテーブルとマッピングする
from my_codes.notes_database import Notes

from sqlalchemy.orm import sessionmaker
from sqlalchemy import func
from sqlalchemy import or_, and_

#セッションを作成
Session = sessionmaker(bind=Engine)
session = Session()

In [None]:

#特定のクエリを持つデータのユーザー名を抽出する
def search_notes(session, search_words):
    """
    Searches for notes containing any of the specified keywords.

    Args:
    - session: SQLAlchemy session object
    - search_words: List of keywords to search for in the notes

    Returns:
    - DataFrame containing the search results
    """
    # Construct the filter conditions using or_
    filter_conditions = or_(*(Notes.tokenized_body.like(f'%{word}%') for word in search_words))
    
    # Perform the query with the constructed filter conditions
    result = session.query(Notes.key, Notes.urlname)\
                    .filter(filter_conditions)
    
    # Convert the result to a DataFrame
    data = pd.DataFrame(result, columns=['key','urlname'])
    
    # Log the search results
    logging.info("Notes containing '%s' found: %d records", ", ".join(search_words), len(data))
    
    # Close the session
    session.close()
    
    return data

In [None]:
data = search_notes(session, search_words)

In [None]:
#ユニークなurlnameを取得
urlnames = data['urlname'].unique()

In [None]:
urlnames.shape

In [None]:
#`https://note.com/api/v2/creators/{urlname}` でユーザー情報を取得できる
#上記のURLにアクセスして、ユーザー情報を全て取得する
import requests
import json
import time

def get_user_info(urlname):
    """
    Gets the user information for the specified URL name.

    Args:
    - urlname: URL name of the user

    Returns:
    - Dictionary containing the user information
    """
    # Construct the URL
    url = f'https://note.com/api/v2/creators/{urlname}'
    
    # Send the request
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the response as JSON
        user_info = json.loads(response.text)
        
        # Log the user information
        logging.info("User information retrieved for '%s'", urlname)
        
        # Pause for 0.1 second to avoid overloading the server
        time.sleep(0.1)
        
        return {urlname: user_info}
    else:
        # Log the error
        logging.error("Failed to retrieve user information for '%s'", urlname)
        
        return {urlname: None}

#urlnamesに入っているすべてのユーザー情報を取得
user_info = [get_user_info(urlname) for urlname in urlnames]


In [None]:
#ユーザー情報を確認する
user_info[0]

In [None]:
# Noneとなっているurlnameを取得
none_urlnames = [list(info.keys())[0] for info in user_info if info[list(info.keys())[0]] is None]

In [None]:
none_urlnames
# noneとなっているユーザーは多分アカウントが削除されている

In [None]:
# search_wordsを`_`で接続して，ファイル名を作成
'_'.join(search_words)

In [None]:
# ユーザー情報を保存する
with open(f"data/user_info_{'_'.join(search_words)}.json", 'w') as f:
    json.dump(user_info, f, ensure_ascii=False, indent=4)
