In [None]:
import json
import itertools
import pandas as pd
import datetime
import re

In [None]:
%run Config.ipynb

In [None]:
file = open('result.json',encoding='utf8')

telegram = json.load(file)
about = telegram['about']
chats = telegram['chats']['list']

In [None]:
# Add whitespaces to distinguish duplicate chat names
names=[]
for chat in chats:
    if (chat['type'] in ['personal_chat','private_group','private_supergroup'])and str(chat['name'])!='None':
        name=chat['name']
        nameCount=names.count(name)
        names.append(name)
        chat['name']=name+' '*nameCount

In [None]:
frequentChats=[]
names=[]
for chat in chats:
    if (chat['type'] in ['personal_chat','private_group','private_supergroup'])and str(chat['name'])!='None':
        
        frequentChats.append({'id':str(chat['id']),'name':chat['name'],'length':len(chat['messages']),
                              'sent':len([msg for msg in chat['messages'] if msg.get('from_id')==ownChatId]),
                              'received':len([msg for msg in chat['messages'] if msg.get('from_id')!=ownChatId])
                             })
        

frequentChats.sort(key=lambda k: k['length'],reverse=True)
frequentChats = pd.DataFrame(frequentChats)

In [None]:
messages = [{'chat':chat['name'],'type':chat['type'],'message':message} 
            for chat in chats for message in chat['messages'] if chat['type'] in ['personal_chat','private_group','private_supergroup']]

sentMessages=[msg['message'] for msg in messages if msg['message'].get('from_id')==ownChatId]
receivedMessages=[msg['message'] for msg in messages if msg['message'].get('from_id')!=ownChatId]

sentPrivateMessages=[msg['message'] for msg in messages if msg['message'].get('from_id')==ownChatId and msg['type']=='personal_chat']
receivedPrivateMessages=[msg['message'] for msg in messages if msg['message'].get('from_id')!=ownChatId and msg['type']=='personal_chat']

In [None]:
sentVoiceMessages = [{'chat': msg['chat'], 'duration':msg['message'].get('duration_seconds',0),'type':'sent'} 
                   for msg in messages if msg['message'].get('media_type')=='voice_message' 
                   and msg['message'].get('from_id')==ownChatId]
receivedVoiceMessages = [{'chat': msg['chat'], 'duration':msg['message'].get('duration_seconds',0),'type':'received'} 
                   for msg in messages if msg['message'].get('media_type')=='voice_message' 
                   and msg['message'].get('from_id')!=ownChatId]

voiceMessageTotal = pd.DataFrame(sentVoiceMessages+receivedVoiceMessages).groupby(['chat']).sum()
voiceMessageSent = pd.DataFrame(sentVoiceMessages).groupby(['chat']).sum()
voiceMessageReceived = pd.DataFrame(receivedVoiceMessages).groupby(['chat']).sum()

voiceMessages = pd.merge(voiceMessageSent, voiceMessageReceived, on='chat',how='outer')
voiceMessages = pd.merge(voiceMessages, voiceMessageTotal, on='chat')

voiceMessages = voiceMessages.rename(columns={'duration_x': 'Sent', 'duration_y': 'Received', 'duration': 'Total'}).sort_values(by=['Total'],ascending=False)
voiceMessageTotal

In [None]:
texts=[str(msg['text']).lower() for msg in sentMessages if msg['text']!='']
texts.sort()

In [None]:
stickerMessages = [msg for msg in sentMessages if msg.get('media_type')=='sticker']
stickerEmojis = [msg['sticker_emoji'] for msg in stickerMessages if 'sticker_emoji' in msg.keys()]
stickerEmojis.sort()
frequentStickerEmojis = [{'emoji':key, 'count':len(list(group))} for key, group in itertools.groupby(stickerEmojis)]
frequentStickerEmojis.sort(key=lambda k:k['count'],reverse=True)
frequentStickerEmojis=pd.DataFrame(frequentStickerEmojis)

In [None]:
frequencies = [{'text':key, 'count':len(list(group))} for key, group in itertools.groupby(texts)]
frequentMessages = [msg for msg in frequencies if msg['count']>1]
frequentMessages.sort(key=lambda k: k['count'],reverse=True )
frequentMessages=pd.DataFrame(frequentMessages)

In [None]:
regrex_pattern = re.compile(pattern = "[^"
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags = re.UNICODE)
def deEmojify(text):   
    return regrex_pattern.sub(r'',text)

emojis=sorted(''.join([msg for msg in [deEmojify(text) for text in texts] if msg!='']))
frequentEmojis=[{'emoji':key, 'count':len(list(group))} for key, group in itertools.groupby(emojis)]
frequentEmojis.sort(key=lambda k:k['count'],reverse=True)
frequentEmojis=pd.DataFrame(frequentEmojis).head(topNEmojis)

In [None]:
messageDatetimes = [{'datetime':datetime.datetime.strptime(msg['message']['date'],'%Y-%m-%dT%H:%M:%S'),'chat':msg['chat']} 
                    for msg in messages if str(msg['chat'])!='None']
messageDatetimes.sort(key=lambda k:k['datetime'])
messageDatetimes = [{'time':message['datetime'],'totalCount':tc,'chat':message['chat']} for tc,message in enumerate(messageDatetimes)]

chatCounter = {}
for msg in messageDatetimes:
    if msg['chat'] in chatCounter.keys():
        chatCounter[msg['chat']]=chatCounter[msg['chat']]+1
    else:
        chatCounter[msg['chat']]=1
    msg['chatCount']=chatCounter[msg['chat']]

In [None]:
messageDates = [datetime.date.fromisoformat(msg['message']['date'][0:10]) for msg in messages]
messageDates.sort()
messageDateFrequencies = [{'date':key, 'count':len(list(group))} for key, group in itertools.groupby(messageDates)]
messagesPerWeekday = pd.DataFrame([{'weekday':date['date'].weekday(),'count':date['count']}for date in messageDateFrequencies]).groupby('weekday').mean()
messageDateFrequencies = pd.DataFrame(messageDateFrequencies)

In [None]:
messageMonths = [datetime.date.fromisoformat(msg['message']['date'][0:8]+'01') for msg in messages]
messageMonths.sort()
messageMonthFrequencies = [{'date':key, 'count':len(list(group))} for key, group in itertools.groupby(messageMonths)]
messageMonthFrequencies = pd.DataFrame(messageMonthFrequencies)

In [None]:
sentMessageTimes = [datetime.time.fromisoformat(msg['date'][11:-2]+'00') for msg in sentPrivateMessages]
receivedMessageTimes = [datetime.time.fromisoformat(msg['date'][11:-2]+'00') for msg in receivedPrivateMessages]
sentMessageTimes.sort()
receivedMessageTimes.sort()
sentMessageTimes = [{'time':key, 'count':len(list(group))} for key, group in itertools.groupby(sentMessageTimes)]
receivedMessageTimes = [{'time':key, 'count':len(list(group))} for key, group in itertools.groupby(receivedMessageTimes)]

counter=0
for h in range(24):
    for m in range(60):
        if sentMessageTimes[counter]['time'].hour!=h or sentMessageTimes[counter]['time'].minute!=m:
            sentMessageTimes.insert(counter,{'time':datetime.time(h,m),'count':0})  
        if receivedMessageTimes[counter]['time'].hour!=h or receivedMessageTimes[counter]['time'].minute!=m:
            receivedMessageTimes.insert(counter,{'time':datetime.time(h,m),'count':0})            
        counter=counter+1

totalMessageTimes = pd.DataFrame([{'time':sentMessageTimes[i]['time'],
                                   'Sent':sentMessageTimes[i]['count'],
                                   'Received':receivedMessageTimes[i]['count'],
                                   'Total':sentMessageTimes[i]['count']+receivedMessageTimes[i]['count']} 
                                  for i in range(1440)])