In [4]:
import requests
import pyperclip
import pandas as pd
import re
import time
from tqdm import tqdm
import json
import datetime

In [5]:
json_load = json.load(open('password.json', 'r'))
TOKEN = json_load["TOKEN"]
CLIENT_SECRET = json_load["CLIENT_SECRET"]
SIGNING_SECRET = json_load["SIGNING_SECRET"]

In [6]:
### ユーザーマスタの取得
def get_all_user_data():
    URL = "https://slack.com/api/users.list"
    cols = ["user_id", "user_name", "gen"]
    m_user = pd.DataFrame(index=[], columns=cols)
    
    header={
        "Authorization": "Bearer {}".format(TOKEN)
    }
    res = requests.get(URL, headers=header)
    res_json = res.json()
    res_data = res_json['members']
    dict_array = []
    print("---ユーザーリストを取得---")
    for single_data in tqdm(res_data):
        if not(single_data['deleted']):  # 削除フラグが立っていない人だけまわす
            # レギュレーション通りの名前をcorrect_nameに格納する
            correct_name = single_data['profile']['display_name']
            pattern = re.compile(r"(-[0-9])|(- [0-9])|(-ob)")
            if not(bool(pattern.search(correct_name))):  # display_nameが指定の命名規則じゃなかったら
                correct_name = single_data['profile']['real_name']  # real_nameの方を採用
            
            # correct_nameの末尾より期数を取得
            splited = correct_name.split('-')
            gen = 'NA'
            if len(splited)>1:
                gen = splited[-1]
                
            ins_row = {
                "user_id": single_data['id'],
                "user_name": correct_name,
                "gen": gen
            }
            dict_array.append(ins_row)
    m_user = pd.concat([m_user, pd.DataFrame.from_dict(dict_array)])
            
    return m_user

In [7]:
### チャネル一覧の取得
def get_all_channels():
    URL = "https://slack.com/api/conversations.list"
    cols = ["channel_id", "channel_name", "purpose"]
    m_channel = pd.DataFrame(index=[], columns=cols)
    
    header={
        "Authorization": "Bearer {}".format(TOKEN)
    }
    res = requests.get(URL, headers=header)
    res_json = res.json()
    res_data = res_json['channels']
    dict_array = []
    print("---チャンネル一覧を取得---")
    for single_data in tqdm(res_data):
        if single_data['is_channel']:  # チャンネルの場合だけまわす
            ins_row = {
                "channel_id": single_data['id'],
                "channel_name": single_data['name'],
                "purpose": single_data['purpose']['value']
            }
            dict_array.append(ins_row)
    m_channel = pd.concat([m_channel, pd.DataFrame.from_dict(dict_array)])
    return m_channel

In [8]:
### チャットデータの取得
def get_all_chat_data(channel_ids, oldest_time=0):
    URL_HISTORY = "https://slack.com/api/conversations.history"
    URL_REPLIES = "https://slack.com/api/conversations.replies"
    cols = ["msg_id", "user_id", "channel_id", "thread_ts", "text", "reaction_num", "top", "timestamp"]
    t_chat = pd.DataFrame(index=[], columns=cols)
    
    # チャンネルごとにスレッドトップを取得し、chatデータへ追加
    dict_array = []
    print("---チャンネルごとのチャットを取得---")
    for channel_id in tqdm(channel_ids):
        header = {"Authorization": "Bearer "+TOKEN}
        params = {
            "channel": channel_id,
            "oldest": oldest_time
            }
        res = requests.get(URL_HISTORY, headers=header, params=params)
        res_json = res.json()
        res_data = res_json['messages']
        for single_data in res_data:
            if "client_msg_id" in single_data:  # システムメッセージは無視する
                # リアクション数をカウント
                reaction_num = 0
                if "reactions" in single_data:
                    for reaction_type in single_data['reactions']:
                        reaction_num += reaction_type['count']
                    
                # 挿入行作成
                ins_row = {
                    "msg_id": single_data['client_msg_id'],
                    "user_id": single_data['user'],
                    "channel_id": channel_id,
                    "thread_ts": single_data['ts'],
                    "text": single_data['text'],
                    "reaction_num": reaction_num,
                    "top": True,
                    "timestamp": datetime.datetime.fromtimestamp(int(single_data['ts']))
                }
                dict_array.append(ins_row)
                
                # リプライメッセージも取得
                header_child = {"Authorization": "Bearer "+TOKEN}
                params_child = {
                    "channel": channel_id,
                    "ts": single_data['ts'],
                    "oldest": oldest_time
                    }
                res_child = requests.get(URL_REPLIES, headers=header_child, params=params_child)
                res_json_child = res_child.json()
                if not("messages" in res_json_child):
                    print(res_json_child)
                res_data_child = res_json_child['messages']
                if len(res_data_child)>1:  # 1番目はスレッドトップなので無視する
                    for single_data_child in res_data_child[1:]:
                        # リアクション数をカウント
                        reaction_num_child = 0
                        if "reactions" in single_data_child:
                            for reaction_type_child in single_data_child['reactions']:
                                reaction_num_child += reaction_type_child['count']
                        # リプライの挿入行作成
                        ins_row_child = {
                            "msg_id": single_data_child['client_msg_id'],
                            "user_id": single_data_child['user'],
                            "channel_id": channel_id,
                            "thread_ts": single_data['ts'],
                            "text": single_data_child['text'],
                            "reaction_num": reaction_num_child,
                            "top": False,
                            "timestamp": datetime.datetime.fromtimestamp(int(single_data_child['ts']))
                        }
                        dict_array.append(ins_row_child)
    t_chat = pd.concat([t_chat, pd.DataFrame.from_dict(dict_array)])
    
    return t_chat
    

In [9]:
oldest_time = int(time.time() - 60*60*24*10)  # 仮数として10日前まで

m_user = get_all_user_data()
m_channel = get_all_channels()
t_chat = get_all_chat_data(m_channel['channel_id'].to_list(), oldest_time)

---ユーザーリストを取得---


100%|██████████| 122/122 [00:00<00:00, 121747.58it/s]


---チャンネル一覧を取得---


100%|██████████| 73/73 [00:00<00:00, 72987.89it/s]


---チャンネルごとのチャットを取得---


  0%|          | 0/73 [00:00<?, ?it/s]


TypeError: an integer is required (got type str)

In [None]:
m_user.to_csv('data/m_user.csv', encoding='utf_8_sig')
m_channel.to_csv('data/m_channel.csv', encoding='utf_8_sig')
t_chat.to_csv('data/t_chat.csv', encoding='utf_8_sig')