In [245]:
import sys
sys.path.append('src')

In [246]:
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR')   ## set French as local language, useful for date display
# locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')  # non-windows computer? 

import pandas as pd

from models.telegram import TelegramModel
from models.messenger import MessengerModel
from utils.formatting import process_for_latex
from utils.timing import add_timing_to_df
from utils.formatting import left_formating, right_formating, left_formating_with_bubbles
from utils.formatting import right_formating_with_bubbles, format_msg

In [247]:
telegram_model = TelegramModel()
messenger_model = MessengerModel()

In [248]:
telegram_data_path = 'data/telegram/ChatExport_2020-12-06/result.json'
concatenated_table_telegram = telegram_model.parse_from_json(telegram_data_path)
#concatenated_table_telegram.info()
#concatenated_table_telegram

../data/telegram/ChatExport_2020-12-06/video_files/video_1@18-07-2020_15-57-31.mp4_thumb.jpg
../data/telegram/ChatExport_2020-12-06/video_files/video_5@19-07-2020_20-33-41.mp4_thumb.jpg


In [249]:
messenger_data_path = 'data/messenger/message_1.json'
concatenated_table_messenger = messenger_model.parse_from_json(messenger_data_path)
messenger_data_path = 'data/messenger/message_2.json'
concatenated_table_messenger_bis = messenger_model.parse_from_json(messenger_data_path)
concatenated_table_messenger = concatenated_table_messenger.append(concatenated_table_messenger_bis)
concatenated_table_messenger

Unnamed: 0,source,datetime,sender,message,path,gifs,reactions,videos
0,Messenger,2020-12-06 16:40:55.571000,B,Non je rigole Marc :),[],[],[],[]
1,Messenger,2020-12-06 16:37:57.799000,M,Gênant ?,[],[],[],[]
2,Messenger,2020-12-06 16:37:54.454000,M,\emoji[ios]{1F605},[],[],[],[]
3,Messenger,2020-12-06 16:01:15.932000,B,\emoji[ios]{1F928},[],[],[],[]
4,Messenger,2020-12-06 16:00:44.320000,M,Zib zib,[],[],[\emoji[ios]{1F606}],[]
...,...,...,...,...,...,...,...,...
95,Messenger,2020-06-14 00:44:16.059000,B,J'avais un doute,[],[],[],[]
96,Messenger,2020-06-14 00:44:07.461000,B,Aucun problème,[],[],[],[]
97,Messenger,2020-06-13 21:04:45.454000,M,(excuse moi je ne l'utilise pas du tout mais j...,[],[],[],[]
98,Messenger,2020-06-13 20:34:40.201000,B,C'est toi sur instagram ?,[],[],[],[]


In [250]:
# Merge two dataframes
concatenated_table = concatenated_table_messenger.append(concatenated_table_telegram)
concatenated_table.shape

(298, 8)

In [251]:
concatenated_table= add_timing_to_df(concatenated_table)
concatenated_table

Unnamed: 0,source,datetime,sender,message,path,gifs,reactions,videos,date,dateStr,timeStr,right,senderGroup
0,Messenger,2020-06-13 12:48:03.130,M,\emoji[ios]{1F44C}\emoji[ios]{1F3FD}\emoji[ios...,[],[],[],[],2020-06-13,Samedi 13 juin,12:48,True,0
1,Messenger,2020-06-13 20:34:40.201,B,C'est toi sur instagram ?,[],[],[],[],2020-06-13,Samedi 13 juin,20:34,False,1
2,Messenger,2020-06-13 21:04:45.454,M,(excuse moi je ne l'utilise pas du tout mais j...,[],[],[],[],2020-06-13,Samedi 13 juin,21:04,True,2
3,Messenger,2020-06-14 00:44:07.461,B,Aucun problème,[],[],[],[],2020-06-14,Dimanche 14 juin,00:44,False,0
4,Messenger,2020-06-14 00:44:16.059,B,J'avais un doute,[],[],[],[],2020-06-14,Dimanche 14 juin,00:44,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,Messenger,2020-12-06 16:00:44.320,M,Zib zib,[],[],[\emoji[ios]{1F606}],[],2020-12-06,Dimanche 6 décembre,16:00,True,1
294,Messenger,2020-12-06 16:01:15.932,B,\emoji[ios]{1F928},[],[],[],[],2020-12-06,Dimanche 6 décembre,16:01,False,2
295,Messenger,2020-12-06 16:37:54.454,M,\emoji[ios]{1F605},[],[],[],[],2020-12-06,Dimanche 6 décembre,16:37,True,3
296,Messenger,2020-12-06 16:37:57.799,M,Gênant ?,[],[],[],[],2020-12-06,Dimanche 6 décembre,16:37,True,3


In [252]:
# Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# -------------------------------------------------------------------

import numpy as np


df = concatenated_table

# mark empty bubbles
df['empty_message'] = np.where(df.message == "", True, False)

# prepare intro and ccl for the latex bubbles
df['introtex']  = np.where(df.right, "\\begin{rightbubbles}", "\\begin{leftbubbles}")
df['conclutex'] = np.where(df.right, "\\end{rightbubbles}", "\\end{leftbubbles}")
df['conclutex'] = df['conclutex'].apply(lambda x: x + "\\vspace*{-0.45cm}")

# Add extra space for speaker switches
is_after_switch = np.not_equal(df['sender'].values[1:], df['sender'].values[:-1])
df['switchtex'] = np.append('', np.where(is_after_switch, "\\vspace*{0.2cm}", " "))

# add hour
#df['datetex'] = "\\flushright{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"
df['datetex'] = "\\hspace{0.5cm}\\hfill{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"


# deal with & in latex
df['message'] = df['message'].replace('&', '\\&')

# concatenate
df['message'] = df[['switchtex','introtex', 'message', 'datetex', 'conclutex']].apply(lambda x: ' '.join(x), axis=1)

# delete empty bubbles
df['message'] = np.where(df.empty_message, "", df.message)

# deal with photo
# is previous photo?
df['is_photo'] =  np.where(df.path, True, False)
df['is_after_photo'] = df['is_photo'].shift(-1) & df['is_photo']
df['is_after_photo'].fillna("False", inplace = True) 
df['is_before_photo'] = df['is_photo'].shift(1) & df['is_photo']
df['is_before_photo'].fillna("False", inplace = True) 


# if photo, add the include figure
df['tex_for_photo'] = np.where((df.is_photo), '\\includegraphics[width=5cm,height=5cm,keepaspectratio]{'+df.path.str.get(0)+'}', "")
# if photo is after another
df['tex_for_photo'] = np.where((df.is_photo) & (df.is_after_photo), df.tex_for_photo+" \\hspace{0.2cm}",  df.tex_for_photo)
# if photo is the first
df['tex_for_photo'] = np.where((df.is_photo) & (df.is_before_photo == False) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + df.tex_for_photo, df.tex_for_photo)
df['tex_for_photo'] = np.where((df.is_photo) & (df.is_before_photo == False) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + df.tex_for_photo, df.tex_for_photo)
# if photo is the last
df['tex_for_photo'] = np.where((df.is_photo) & (df.is_after_photo == False) & (df.right == False), df.tex_for_photo+ '\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_photo)
df['tex_for_photo'] = np.where((df.is_photo) & (df.is_after_photo == False) & (df.right), df.tex_for_photo+ '\\end{flushright}'+'\n'+'\\end{figure}', df.tex_for_photo)



# THE TWO GOOD LINES
#df['tex_for_photo'] = np.where((df.is_photo) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.4\\textwidth]{'+df.path.str.get(0)+'}'+'\\end{flushright}'+'\n'+'\\end{figure}', "")
#df['tex_for_photo'] = np.where((df.is_photo) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.4\\textwidth]{'+df.path.str.get(0)+'}'+'\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_photo)

# join photo and message, with photo first
#df['message'] = df[['tex_for_photo', 'message']].apply(lambda x: ' '.join(x), axis=1)

# deal with gifs
df['is_gif'] =  np.where(df.gifs, True, False)
df['tex_for_gif'] = np.where((df.is_gif) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright} \emoji[ios]{1F3A5}  \\includegraphics[width=0.3\\textwidth]{'+df.gifs.str.get(0)+'}'+'\\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_gif'] = np.where((df.is_gif) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft} \emoji[ios]{1F3A5}  \\includegraphics[width=0.3\\textwidth]{'+df.gifs.str.get(0)+'}'+' \\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_gif)


# deal with gifs
df['is_videos'] =  np.where(df.videos, True, False)
df['tex_for_videos'] = np.where((df.is_videos) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright} \emoji[ios]{1F3A5} \\includegraphics[width=0.3\\textwidth]{'+df.videos.str.get(0)+'} \\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_videos'] = np.where((df.is_videos) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft} \emoji[ios]{1F3A5}  \\includegraphics[width=0.3\\textwidth]{'+df.videos.str.get(0)+' \\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_videos)

# deal with reactions
df['is_reactions'] =  np.where(df.reactions, True, False)
#df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right), '\\begin{flushleft}' +df.reactions.str.get(0)+ '\\end{flushleft}', "")
#df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right == False), '\\begin{flushright} ' +df.reactions.str.get(0)+ '\\end{flushright}', df.tex_for_reactions)
df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right == False), '\\vspace*{-0.6cm}\\begin{flushleft}' +df.reactions.str.get(0)+ '\\end{flushleft}\\vspace*{-0.3cm}', " ")
df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right), '\\vspace*{-0.6cm}\\begin{flushright} ' +df.reactions.str.get(0)+ '\\end{flushright}\\vspace*{-0.3cm}', df.tex_for_reactions)

# join photo and message, with photo first
df['message'] = df[['tex_for_gif', 'tex_for_photo', 'message','tex_for_reactions']].apply(lambda x: ' '.join(x), axis=1)


for date, df_t in df.groupby('date'):
    date_str = df_t['dateStr'].unique()[0]
    title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
    discussion = '\n'.join(df_t.message)
    text = f'{title}\n{discussion}'
    
    with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
        ft.write(text)


In [253]:
# SolutionS to get the first item of the list for path

#df['tex_for_photo'] = np.where((df.is_photo) & (df.right), df.path.str.get(0), "toto")
#test = df['path'].apply(lambda x: "toto" if len(x) == 0 else x[0])
#test
df['is_before_photo'][150:160]

150    False
151    False
152    False
153     True
154    False
155    False
156    False
157    False
158    False
159    False
Name: is_before_photo, dtype: bool

In [184]:
df['is_photo'][150:160]

150    False
151    False
152     True
153     True
154    False
155    False
156    False
157    False
158    False
159    False
Name: is_photo, dtype: bool

In [187]:
df['is_after_photo'][150:160]

150    False
151    False
152     True
153    False
154    False
155    False
156    False
157    False
158    False
159    False
Name: is_after_photo, dtype: bool

In [None]:
# # Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# # -------------------------------------------------------------------
# df = concatenated_table
# separate_messages = True

# for date, df_t in df.groupby('date'):
#     date_str = df_t['dateStr'].unique()[0]
#     title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
#     if separate_messages:
#         discussion = '\n'.join(df_t.groupby('message').apply(format_msg))
#     else:
#         discussion = '\n'.join(df_t.groupby('senderGroup').apply(format_msg))
#     text = f'{title}\n{discussion}'
    
#     with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
#         ft.write(text)

In [None]:
import re
test = "https://interncache-atn.fbcdn.net/v/t1.15752-9/125222477_1727067340807430_6855689147600839821_n_1727067334140764.png?ccb=2&_nc_sid=73a6a0&efg=eyJ1cmxnZW4iOiJwaHBfdXJsZ2VuX2NsaWVudC9pbW9nZW46RFlJTWVkaWFVdGlscyJ9&_nc_ad=z-m&_nc_cid=0&_nc_ht=interncache-atn&oh=54adebd7bb72b88abfa5976648859ea3&oe=5FD625B5"
res = re.search("([0-9]*)_([0-9]*)_([0-9]*)_(.).", test)
if 'png' in test:
    print(True)
print(res)
file_name = f"{res.group(1)}_{res.group(2)}_{res.group(3)}_{res.group(4)}_{res.group(2)}.png"
file_name

In [None]:
concatenated_table