In [1]:
import sys
sys.path.append('src')

In [2]:
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR')   ## set French as local language, useful for date display
# locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')  # non-windows computer? 

import pandas as pd

from models.telegram import TelegramModel
from models.messenger import MessengerModel
from utils.formatting import process_for_latex
from utils.timing import add_timing_to_df
from utils.formatting import left_formating, right_formating, left_formating_with_bubbles
from utils.formatting import right_formating_with_bubbles, format_msg

In [3]:
telegram_model = TelegramModel()
messenger_model = MessengerModel()

In [4]:
telegram_data_path = 'data/telegram/ChatExport_2020-11-15/result.json'
concatenated_table_telegram = telegram_model.parse_from_json(telegram_data_path)
#concatenated_table_telegram.info()
concatenated_table_telegram

['../data/telegram/ChatExport_2020-11-15/photos/photo_1@18-07-2020_00-16-11.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_2@18-07-2020_08-35-07.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_3@18-07-2020_10-49-50.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_4@18-07-2020_10-49-50.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_5@18-07-2020_12-53-01.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_6@18-07-2020_12-53-01.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_7@18-07-2020_12-53-01.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_8@18-07-2020_17-48-47.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_9@18-07-2020_19-58-38.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_10@19-07-2020_04-21-44.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_11@19-07-2020_04-21-44.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_12@19-07-2020_20-30-47.jpg']
['../data/tel

['../data/telegram/ChatExport_2020-11-15/photos/photo_107@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_108@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_109@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_110@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_111@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_112@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_113@13-08-2020_18-04-04.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_114@13-08-2020_20-00-03.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_115@13-08-2020_20-00-03.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_116@13-08-2020_20-00-03.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_117@13-08-2020_20-00-03.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_118@13-08-2020_20-00-0

Unnamed: 0,source,datetime,sender,message,path,reactions
0,Telegram,2020-06-16T18:01:09,B,La pandémie du covid-19 est une maladie nouvel...,[],[]
1,Telegram,2020-06-16T18:06:17,M,Lois mathématiques --,[],[]
2,Telegram,2020-06-17T09:19:55,M,"J'ai lu le teasing d'intro, super bien posé, h...",[],[]
3,Telegram,2020-07-07T15:54:05,B,Du coup si on résume : \ * \emoji[ios]{1F682} ...,[],[]
4,Telegram,2020-07-18T00:16:11,B,j'ai meme fait le graphique de la médecin,[../data/telegram/ChatExport_2020-11-15/photos...,[]
...,...,...,...,...,...,...
4994,Telegram,2020-08-20T07:06:52,B,Moi aussi j'ai hâte de te voir :)\ Bientôt !!,[],[]
4995,Telegram,2020-08-20T07:15:22,B,,[../data/telegram/ChatExport_2020-11-15/photos...,[]
4996,Telegram,2020-08-20T07:15:33,B,La maison de Johan à SF \emoji[ios]{1F92F},[],[]
4997,Telegram,2020-08-20T07:15:56,B,Si tu vas dans la baie pour kraaft prochaineme...,[],[]


In [5]:
messenger_data_path = 'data/messenger/message_1.json'
concatenated_table_messenger = messenger_model.parse_from_json(messenger_data_path)
concatenated_table_messenger

Unnamed: 0,source,datetime,sender,message,path,reactions
0,Messenger,2020-11-13 20:26:38.528000,B,La publie de microbiologie est finale !! On va...,[],[]
1,Messenger,2020-11-13 20:26:21.514000,B,ET,[],[]


In [6]:
# Merge two dataframes
concatenated_table = concatenated_table_messenger.append(concatenated_table_telegram)
concatenated_table.shape

(5001, 6)

In [7]:
concatenated_table= add_timing_to_df(concatenated_table)

In [8]:
# Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# -------------------------------------------------------------------

import numpy as np

df = concatenated_table


# mark empty bubbles
df['empty_message'] = np.where(df.message == "", True, False)

# prepare intro and ccl for the latex bubbles
df['introtex']  = np.where(df.right, "\\begin{rightbubbles}", "\\begin{leftbubbles}")
df['conclutex'] = np.where(df.right, "\\end{rightbubbles}", "\\end{leftbubbles}")

# add hour
#df['datetex'] = "\\flushright{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"
df['datetex'] = "\\hspace{0.5cm}\\hfill{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"


# deal with & in latex
df['message'] = df['message'].replace('&', '\\&')

# concatenate
df['message'] = df[['introtex', 'message', 'datetex', 'conclutex']].apply(lambda x: ' '.join(x), axis=1)

# delete empty bubbles
df['message'] = np.where(df.empty_message, "", df.message)

# deal with photo
# for now only one photo by one photo ? df['path'].apply(len).value_counts()
df['is_photo'] =  np.where(df.path, True, False)
df['tex_for_photo'] = np.where((df.is_photo) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.5\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_photo'] = np.where((df.is_photo) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.5\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_photo)

# join photo and message, with photo first
df['message'] = df[['tex_for_photo', 'message']].apply(lambda x: ' '.join(x), axis=1)

for date, df_t in df.groupby('date'):
    date_str = df_t['dateStr'].unique()[0]
    title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
    discussion = '\n'.join(df_t.message)
    text = f'{title}\n{discussion}'
    
    with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
        ft.write(text)


In [9]:
# SolutionS to get the first item of the list for path

#df['tex_for_photo'] = np.where((df.is_photo) & (df.right), df.path.str.get(0), "toto")
#test = df['path'].apply(lambda x: "toto" if len(x) == 0 else x[0])
#test
df.tex_for_photo

0                                                        
1                                                        
2                                                        
3                                                        
4       \begin{figure}[H] \n \begin{flushright} \n \in...
                              ...                        
4996                                                     
4997                                                     
4998                                                     
4999                                                     
5000                                                     
Name: tex_for_photo, Length: 5001, dtype: object

In [10]:
# # Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# # -------------------------------------------------------------------
# df = concatenated_table
# separate_messages = True

# for date, df_t in df.groupby('date'):
#     date_str = df_t['dateStr'].unique()[0]
#     title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
#     if separate_messages:
#         discussion = '\n'.join(df_t.groupby('message').apply(format_msg))
#     else:
#         discussion = '\n'.join(df_t.groupby('senderGroup').apply(format_msg))
#     text = f'{title}\n{discussion}'
    
#     with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
#         ft.write(text)

In [11]:
import re
test = "https://interncache-atn.fbcdn.net/v/t1.15752-9/125222477_1727067340807430_6855689147600839821_n_1727067334140764.png?ccb=2&_nc_sid=73a6a0&efg=eyJ1cmxnZW4iOiJwaHBfdXJsZ2VuX2NsaWVudC9pbW9nZW46RFlJTWVkaWFVdGlscyJ9&_nc_ad=z-m&_nc_cid=0&_nc_ht=interncache-atn&oh=54adebd7bb72b88abfa5976648859ea3&oe=5FD625B5"
res = re.search("([0-9]*)_([0-9]*)_([0-9]*)_(.).", test)
if 'png' in test:
    print(True)
print(res)
file_name = f"{res.group(1)}_{res.group(2)}_{res.group(3)}_{res.group(4)}_{res.group(2)}.png"
file_name

True
<re.Match object; span=(47, 96), match='125222477_1727067340807430_6855689147600839821_n_>


'125222477_1727067340807430_6855689147600839821_n_1727067340807430.png'

In [12]:
test = "jedhfy&djef"
line = test.replace('&', '\\&')
line

'jedhfy\\&djef'