# For Getting channels log

## Import modules

In [1]:
import os
from slack_sdk import WebClient
import pandas as pd
import numpy as np
import datetime
from collections import OrderedDict
import openpyxl
import urllib

## WebApp settings

In [2]:
proxy = urllib.request.getproxies().get('http')
print(proxy)
client = WebClient(token=os.environ['TEST_CH_BOT_TOKEN'], proxy=proxy)
uclient = WebClient(token=os.environ['TEST_CH_USER_TOKEN'], proxy=proxy)

None


## get messages (without replies) in specified channel 

### get channels (chs) ids and names in your workspace
#### retrive wholl info of chs (for backup raw data)

In [3]:
ws_info = uclient.conversations_list()
ws_info_ch = ws_info.get('channels')
df_ch_info = pd.json_normalize(ws_info_ch)
print(df_ch_info.columns)

Index(['id', 'name', 'is_channel', 'is_group', 'is_im', 'is_mpim',
       'is_private', 'created', 'is_archived', 'is_general', 'unlinked',
       'name_normalized', 'is_shared', 'is_org_shared',
       'is_pending_ext_shared', 'pending_shared', 'context_team_id',
       'parent_conversation', 'creator', 'is_ext_shared', 'shared_team_ids',
       'pending_connected_team_ids', 'is_member', 'previous_names',
       'num_members', 'topic.value', 'topic.creator', 'topic.last_set',
       'purpose.value', 'purpose.creator', 'purpose.last_set'],
      dtype='object')


#### extract chs ids and names

In [4]:
ch_id_name = []
for i in zip(df_ch_info['id'], df_ch_info['name']):
    ch_id_name.append(i)

df_ch_id_name = df_ch_info[['id', 'name']]
df_ch_id_name

Unnamed: 0,id,name
0,C03V9LGSRS7,general
1,C03V9LS0AN7,devslacksdk
2,C040KER6LBS,random


---
### specify channel for retriving log

In [5]:
# example: specify a channel named 'devslacksdk'
ch_idx = 1 # select number on the left of 'id' column above table

ch_id = df_ch_id_name['id'][ch_idx]
ch_name = df_ch_id_name['name'][ch_idx]
print('Specified channel: '+ch_name+', ch_id: '+ch_id)

Specified channel: devslacksdk, ch_id: C03V9LS0AN7


### get messages log for specified ch

#### retrive raw data and create table of messages log

In [6]:
ch_log = uclient.conversations_history(channel=ch_id) # it is raw data without replies
ch_ms_log = ch_log['messages']

df_ch_ms_log = pd.json_normalize(ch_ms_log)
list_log = []

# slack API can retrive only 100 messages at once, so following steps are for retriving over 100.
i = 0
if ch_log['has_more'] == True:
    while ch_log['has_more'] == True:
        ch_log = uclient.conversations_history(channel=ch_id, cursor=ch_log['response_metadata']['next_cursor'])
        ch_ms_log_n = ch_log['messages']
        list_log.append(ch_log['messages'])
        
        df_ch_ms_log_n = pd.json_normalize(ch_ms_log_n)
        df_ch_ms_log = df_ch_ms_log.append(df_ch_ms_log_n)
        i += 1
print(i)
flat = [x for row in list_log for x in row]
ch_ms_log.extend(flat) # ch_ms_log can be stored over 100 logs
print(len(ch_ms_log)) # num of messages
df_ch_ms_log.columns

0
15


Index(['type', 'text', 'files', 'upload', 'user', 'display_as_bot', 'ts',
       'blocks', 'client_msg_id', 'thread_ts', 'reply_count',
       'reply_users_count', 'latest_reply', 'reply_users', 'is_locked',
       'subscribed', 'last_read', 'team', 'subtype', 'bot_id', 'bot_link',
       'app_id', 'bot_profile.id', 'bot_profile.deleted', 'bot_profile.name',
       'bot_profile.updated', 'bot_profile.app_id',
       'bot_profile.icons.image_36', 'bot_profile.icons.image_48',
       'bot_profile.icons.image_72', 'bot_profile.team_id'],
      dtype='object')

#### select info what you want to retrive and format table

In [7]:
reindex_col = ['type', 'thread_ts', 'ts', 'user', 'text', 'reply_count', 'reply_users_count', 'topic']
df_ch_ms_log = df_ch_ms_log.reindex(columns=reindex_col)
df_ch_ms_log

Unnamed: 0,type,thread_ts,ts,user,text,reply_count,reply_users_count,topic
0,message,1662263292.995709,1662263292.995709,U03V9PRH45Q,attach,1.0,1.0,
1,message,1662258606.222179,1662258606.222179,U03V9PRH45Q,take me out to the ball game,2.0,2.0,
2,message,,1662018576.257109,U03V6QSJAHK,added an integration to this channel: <https:/...,,,
3,message,,1661521206.696809,U03V822QKT7,<@U03V822QKT7>さんがチャンネルに参加しました,,,
4,message,1661521178.753299,1661521178.753299,U03V822QKT7,this is a test what this app can do as non-adm...,1.0,1.0,
5,message,,1661510006.376009,U03V9NAJJV9,test,,,
6,message,,1661509993.000529,U03V9NAJJV9,test,,,
7,message,,1661509970.285659,U03V9NAJJV9,test,,,
8,message,1661509962.274849,1661509962.274849,U03V9NAJJV9,test,1.0,1.0,
9,message,,1661509831.231479,U03V9NAJJV9,test,,,


### (in MESSAGES) retrive attached files and create table contains file name and download url

In [8]:
files = []

for i in df_ch_ms_log.index:
    if 'files' in ch_ms_log[i].keys():
        files.append(ch_ms_log[i]['files'][0])
    else:
        files.append({'name': 'None', 'url_private_download': 'None'})  

df_ch_files_ms = pd.json_normalize(files)
df_ch_files_ms = df_ch_files_ms[['name', 'url_private_download']]
df_ch_files_ms = df_ch_files_ms.rename(columns={'name': 'FileName', 'url_private_download': 'FileURL'})
df_ch_files_ms

Unnamed: 0,FileName,FileURL
0,README.md,https://files.slack.com/files-pri/T040KER5GG0-...
1,,
2,,
3,,
4,,
5,,
6,,
7,,
8,,
9,,


### combine MESSAGE table with file rep table and sort by ts

In [9]:
df_ch_ms_log = pd.concat([df_ch_ms_log, df_ch_files_ms], axis=1).sort_values('ts').reset_index().drop(columns='index')
df_ch_ms_log

Unnamed: 0,type,thread_ts,ts,user,text,reply_count,reply_users_count,topic,FileName,FileURL
0,message,,1661503459.015449,U03V9PRH45Q,<@U03V9PRH45Q>さんがチャンネルに参加しました,,,,,
1,message,,1661503499.739869,U03V6QSJAHK,<@U03V6QSJAHK>さんがチャンネルに参加しました,,,,,
2,message,,1661505272.611409,U03V9NAJJV9,test,,,,,
3,message,,1661505334.528119,U03V9NAJJV9,<@U03V9NAJJV9>さんがチャンネルに参加しました,,,,,
4,message,1661505509.376439,1661505509.376439,U03V9PRH45Q,test,1.0,1.0,,,
5,message,,1661509831.231479,U03V9NAJJV9,test,,,,,
6,message,1661509962.274849,1661509962.274849,U03V9NAJJV9,test,1.0,1.0,,,
7,message,,1661509970.285659,U03V9NAJJV9,test,,,,,
8,message,,1661509993.000529,U03V9NAJJV9,test,,,,,
9,message,,1661510006.376009,U03V9NAJJV9,test,,,,,


---
## get replies to associate with each replies by parent messages

### search messages including replies and retrive reps with parent messages

In [10]:
threads = []
for i in df_ch_ms_log.index:
    if np.isnan(df_ch_ms_log['reply_count'][i]) == False:
        thr = uclient.conversations_replies(channel=ch_id, ts=df_ch_ms_log['thread_ts'][i])
        thr_ms = thr.get('messages')
        threads.append(thr_ms)

In [11]:
reps = []
for i in range(len(threads)):
    rep = threads[i]
    for j in range(len(rep)):
        reps.append(rep[j])
print(len(reps))

11


### following steps for create table contains messages and reps
#### create reply table
#### (in REPLIES) retrive attached files and create table contains file name and download url
#### combine REPLY table with FILE table and sort by timestamp

In [12]:
if len(reps) != 0:
    # create reply table
    reindex_col = ['type', 'thread_ts', 'ts', 'user', 'text', 'reply_count', 'reply_users_count', 'topic']
    df_ch_rep = pd.json_normalize(reps).reindex(columns=reindex_col)

    
    # (in REPLIES) retrive attached files and create table contains file name and download url
    files_rep = []
    for i in range(len(reps)):
        if 'files' in reps[i].keys():
            files_rep.append(reps[i]['files'][0])
        else:
            files_rep.append({'name': 'None', 'url_private_download': 'None'})

    df_files_rep = pd.json_normalize(files_rep)
    df_files_rep = df_files_rep[['name', 'url_private_download']]
    df_files_rep = df_files_rep.rename(columns={'name': 'FileName', 'url_private_download': 'FileURL'})
    print(df_files_rep)

    # combine REPLY table with FILE table and sort by thread_ts
    df_ch_rep_log = pd.concat([df_ch_rep, df_files_rep], axis=1).sort_values('thread_ts').reset_index().drop(columns='index')
    df_ch_rep_log['type']='thread'
    print(df_ch_rep_log)

    
    # combine MESSAGE with REPLY 
    df_ch_log = pd.concat([df_ch_ms_log, df_ch_rep_log])
    # remove duplicated parent messages by drop_duplicates
    df_ch_log = df_ch_log.drop_duplicates(subset = ['text', 'ts'], keep='last').reset_index().drop(columns='index')


else:
    df_ch_log = df_ch_ms_log
df_ch_log

                FileName                                            FileURL
0                   None                                               None
1      GetLog_logo_3.png  https://files.slack.com/files-pri/T040KER5GG0-...
2                   None                                               None
3      GetLog_logo_3.png  https://files.slack.com/files-pri/T040KER5GG0-...
4                   None                                               None
5                   None                                               None
6                   None                                               None
7                   None                                               None
8   baseball_stadium.png  https://files.slack.com/files-pri/T040KER5GG0-...
9              README.md  https://files.slack.com/files-pri/T040KER5GG0-...
10       GetLog_logo.png  https://files.slack.com/files-pri/T040KER5GG0-...
      type          thread_ts                 ts         user  \
0   thread  1661505509.

Unnamed: 0,type,thread_ts,ts,user,text,reply_count,reply_users_count,topic,FileName,FileURL
0,message,,1661503459.015449,U03V9PRH45Q,<@U03V9PRH45Q>さんがチャンネルに参加しました,,,,,
1,message,,1661503499.739869,U03V6QSJAHK,<@U03V6QSJAHK>さんがチャンネルに参加しました,,,,,
2,message,,1661505272.611409,U03V9NAJJV9,test,,,,,
3,message,,1661505334.528119,U03V9NAJJV9,<@U03V9NAJJV9>さんがチャンネルに参加しました,,,,,
4,message,,1661509831.231479,U03V9NAJJV9,test,,,,,
5,message,,1661509970.285659,U03V9NAJJV9,test,,,,,
6,message,,1661509993.000529,U03V9NAJJV9,test,,,,,
7,message,,1661510006.376009,U03V9NAJJV9,test,,,,,
8,message,,1661521206.696809,U03V822QKT7,<@U03V822QKT7>さんがチャンネルに参加しました,,,,,
9,message,,1662018576.257109,U03V6QSJAHK,added an integration to this channel: <https:/...,,,,,


## shape table

### convert dtypes of ts and thread_ts, from str to float, and replace NaN to 'None'

In [13]:
df_ch_log_astype = df_ch_log.astype({'ts': float, 'thread_ts': float}).fillna('None')
df_ch_log_astype

Unnamed: 0,type,thread_ts,ts,user,text,reply_count,reply_users_count,topic,FileName,FileURL
0,message,,1661503000.0,U03V9PRH45Q,<@U03V9PRH45Q>さんがチャンネルに参加しました,,,,,
1,message,,1661503000.0,U03V6QSJAHK,<@U03V6QSJAHK>さんがチャンネルに参加しました,,,,,
2,message,,1661505000.0,U03V9NAJJV9,test,,,,,
3,message,,1661505000.0,U03V9NAJJV9,<@U03V9NAJJV9>さんがチャンネルに参加しました,,,,,
4,message,,1661510000.0,U03V9NAJJV9,test,,,,,
5,message,,1661510000.0,U03V9NAJJV9,test,,,,,
6,message,,1661510000.0,U03V9NAJJV9,test,,,,,
7,message,,1661510000.0,U03V9NAJJV9,test,,,,,
8,message,,1661521000.0,U03V822QKT7,<@U03V822QKT7>さんがチャンネルに参加しました,,,,,
9,message,,1662019000.0,U03V6QSJAHK,added an integration to this channel: <https:/...,,,,,


### replace UNIX DATE to formatted one and separate ts to date and time (for multiindex)

In [14]:
date = []
time = []
for i in df_ch_log_astype.index:
    if (type(df_ch_log_astype['ts'][i]) == np.float64 or 
        type(df_ch_log_astype['ts'][i]) == float):
        # create list of date
        dt_raw = datetime.date.fromtimestamp(df_ch_log_astype['ts'][i])
        dt = dt_raw.strftime('%a, %b %d, %Y')
        date.append(dt)
        print('finished formatting ts to date')        
        
        # create list of time
        ti_raw = datetime.datetime.fromtimestamp(df_ch_log_astype['ts'][i])
        ti = ti_raw.strftime('%H:%M')
        time.append(ti)
        print('finished formatting ts to time') 
    
    # format thread_ts to datetime
    if (type(df_ch_log_astype['thread_ts'][i]) == np.float64 or 
        type(df_ch_log_astype['thread_ts'][i]) == float):
        dtime = datetime.datetime.fromtimestamp(df_ch_log_astype['thread_ts'][i])
        df_ch_log_astype.iloc[i, 1] = dtime.strftime('%H:%M, %a, %b %d, %Y')
        print ('finished formatting thread_ts')
    
    else:
        print('thread_ts is not found')

df_datetime = pd.DataFrame({'date': date, 
                            'time': time})
df_ch_log_frt = pd.concat([df_datetime, df_ch_log_astype],axis=1).drop(columns='ts')
df_ch_log_frt

finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
thread_ts is not found
finished formatting ts to date
finished formatting ts to time
finished formatting thread_ts
finished formatting ts to date
finished formatting ts to t

Unnamed: 0,date,time,type,thread_ts,user,text,reply_count,reply_users_count,topic,FileName,FileURL
0,"Fri, Aug 26, 2022",17:44,message,,U03V9PRH45Q,<@U03V9PRH45Q>さんがチャンネルに参加しました,,,,,
1,"Fri, Aug 26, 2022",17:44,message,,U03V6QSJAHK,<@U03V6QSJAHK>さんがチャンネルに参加しました,,,,,
2,"Fri, Aug 26, 2022",18:14,message,,U03V9NAJJV9,test,,,,,
3,"Fri, Aug 26, 2022",18:15,message,,U03V9NAJJV9,<@U03V9NAJJV9>さんがチャンネルに参加しました,,,,,
4,"Fri, Aug 26, 2022",19:30,message,,U03V9NAJJV9,test,,,,,
5,"Fri, Aug 26, 2022",19:32,message,,U03V9NAJJV9,test,,,,,
6,"Fri, Aug 26, 2022",19:33,message,,U03V9NAJJV9,test,,,,,
7,"Fri, Aug 26, 2022",19:33,message,,U03V9NAJJV9,test,,,,,
8,"Fri, Aug 26, 2022",22:40,message,,U03V822QKT7,<@U03V822QKT7>さんがチャンネルに参加しました,,,,,
9,"Thu, Sep 01, 2022",16:49,message,,U03V6QSJAHK,added an integration to this channel: <https:/...,,,,,


### get users info

In [15]:
users_info = uclient.users_list().get('members')
df_users_info = pd.json_normalize(users_info)

# create list of user_id and names
user_id_names = []
for i in zip(df_users_info['id'], df_users_info['real_name']):
    user_id_names.append(i)
df_user_id_names = pd.DataFrame(user_id_names,columns=['id', 'real_name']).fillna('deleted user').astype({'real_name': str})
df_user_id_names

Unnamed: 0,id,real_name
0,USLACKBOT,Slackbot
1,U03V6QSJAHK,hk
2,U03V822QKT7,deleted user
3,U03V9NAJJV9,deleted user
4,U03V9PRH45Q,test_user
5,U040Q1A745S,Slack Developer Tools
6,U040W9Z63BP,test_getlog


### replace user id in 'names' column to 'real_name'

In [16]:
for i in df_ch_log_frt.index:
    for j in range(len(user_id_names)):
        if df_ch_log_frt.user[i] == df_user_id_names['id'][j]:
            df_ch_log_frt.user[i] = df_user_id_names['real_name'][j]

### replace user id in 'text' column to 'real_name' (maybe able to merge above cell?)

In [17]:
for i in df_ch_log_frt.index:
    for j in df_user_id_names.index:
        if df_user_id_names['id'][j] in df_ch_log_frt['text'][i]:
            df_ch_log_frt['text'][i] = df_ch_log_frt['text'][i].replace(df_user_id_names['id'][j], df_user_id_names['real_name'][j])
        else:
            continue

### for only uploading file (None of text fields)

In [18]:
for i in df_ch_log_frt.index:
    if df_ch_log_frt.text[i] == '':
        df_ch_log_frt.text[i] = 'attached file(s) only'
    else:
        continue

### check result of formatting and replacing

In [19]:
df_ch_log_frt

Unnamed: 0,date,time,type,thread_ts,user,text,reply_count,reply_users_count,topic,FileName,FileURL
0,"Fri, Aug 26, 2022",17:44,message,,test_user,<@test_user>さんがチャンネルに参加しました,,,,,
1,"Fri, Aug 26, 2022",17:44,message,,hk,<@hk>さんがチャンネルに参加しました,,,,,
2,"Fri, Aug 26, 2022",18:14,message,,deleted user,test,,,,,
3,"Fri, Aug 26, 2022",18:15,message,,deleted user,<@deleted user>さんがチャンネルに参加しました,,,,,
4,"Fri, Aug 26, 2022",19:30,message,,deleted user,test,,,,,
5,"Fri, Aug 26, 2022",19:32,message,,deleted user,test,,,,,
6,"Fri, Aug 26, 2022",19:33,message,,deleted user,test,,,,,
7,"Fri, Aug 26, 2022",19:33,message,,deleted user,test,,,,,
8,"Fri, Aug 26, 2022",22:40,message,,deleted user,<@deleted user>さんがチャンネルに参加しました,,,,,
9,"Thu, Sep 01, 2022",16:49,message,,hk,added an integration to this channel: <https:/...,,,,,


### multiIndex

In [20]:
df_ch_log_frt_mi = df_ch_log_frt.set_index(['type', 'thread_ts', 'date', 'time'])
df_ch_log_frt_mi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,user,text,reply_count,reply_users_count,topic,FileName,FileURL
type,thread_ts,date,time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
message,,"Fri, Aug 26, 2022",17:44,test_user,<@test_user>さんがチャンネルに参加しました,,,,,
message,,"Fri, Aug 26, 2022",17:44,hk,<@hk>さんがチャンネルに参加しました,,,,,
message,,"Fri, Aug 26, 2022",18:14,deleted user,test,,,,,
message,,"Fri, Aug 26, 2022",18:15,deleted user,<@deleted user>さんがチャンネルに参加しました,,,,,
message,,"Fri, Aug 26, 2022",19:30,deleted user,test,,,,,
message,,"Fri, Aug 26, 2022",19:32,deleted user,test,,,,,
message,,"Fri, Aug 26, 2022",19:33,deleted user,test,,,,,
message,,"Fri, Aug 26, 2022",19:33,deleted user,test,,,,,
message,,"Fri, Aug 26, 2022",22:40,deleted user,<@deleted user>さんがチャンネルに参加しました,,,,,
message,,"Thu, Sep 01, 2022",16:49,hk,added an integration to this channel: <https:/...,,,,,


## export files

### setting *you can custom something (year, path, etc.)

In [21]:
backup_date = datetime.date.today()
backup_date = backup_date.strftime('%y%m%d')

# DO NOT FORGET MODIFY YEAR (year of current backup workspace) 
tgt_ws_year = 2022
print(backup_date)
print(ch_name)

220905
devslacksdk


In [22]:
path_ch = '../channel/'
if os.path.exists(path_ch) == False:
    %mkdir path_ch
else:
    print('directory already exists')
    
path_ch_full = '../ch_full_log/'
if os.path.exists(path_ch_full) == False:
    %mkdir path_ch_full
else:
    print('directory already exists')
    
path_YEAR = str(tgt_ws_year)
if os.path.exists(path_ch+path_YEAR) == False:
    %mkdir path_ch+path_YEAR
else:
    print('directory already exists')

if os.path.exists(path_ch_full+path_YEAR) == False:
    %mkdir path_ch_full+path_YEAR
else:
    print('directory already exists')

directory already exists
directory already exists
directory already exists
directory already exists


### export excel file (for remaining multiIndex and encoding automatically)

In [23]:
df_ch_log_frt_mi.to_excel(path_ch+path_YEAR+'/log_ch_'+path_YEAR+'_'+ch_name+'_'+backup_date+'.xlsx')

### export raw file (json format)

#### not included replies

In [24]:
with open(path_ch_full+path_YEAR+'/Raw_ch_log_NOrep_'+path_YEAR+'_'+ch_name+'_'+backup_date+'.dat', 'w') as f:
    print(ch_log, file=f) 

#### replies data

In [25]:
with open(path_ch_full+path_YEAR+'/Raw_ch_rep_log_'+path_YEAR+'_'+ch_name+'_'+backup_date+'.dat', 'w') as f:
    print(thr, file=f) 