In [17]:
import sys
sys.path.append('src')

In [18]:
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR')   ## set French as local language, useful for date display
# locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')  # non-windows computer? 

import pandas as pd

from models.telegram import TelegramModel
from models.messenger import MessengerModel
from utils.formatting import process_for_latex
from utils.timing import add_timing_to_df
from utils.formatting import left_formating, right_formating, left_formating_with_bubbles
from utils.formatting import right_formating_with_bubbles, format_msg

In [19]:
telegram_model = TelegramModel()
messenger_model = MessengerModel()

In [None]:
telegram_data_path = 'data/telegram/ChatExport_2020-12-06/result.json'
concatenated_table_telegram = telegram_model.parse_from_json(telegram_data_path)
#concatenated_table_telegram.info()
#concatenated_table_telegram

../data/telegram/ChatExport_2020-12-06/video_files/video_1@18-07-2020_15-57-31.mp4_thumb.jpg
../data/telegram/ChatExport_2020-12-06/video_files/video_5@19-07-2020_20-33-41.mp4_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker.webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/video_files/mp4.mp4_thumb.jpg
../data/telegram/ChatExport_2020-12-06/video_files/giphy.mp4_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (1).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (2).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (3).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (4).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (5).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (6).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (7).webp_thumb.jpg
../data/telegram/ChatExport_2020-12-06/stickers/sticker (8).webp_thumb.jpg
../

In [None]:
messenger_data_path = 'data/messenger/message_1.json'
concatenated_table_messenger = messenger_model.parse_from_json(messenger_data_path)
messenger_data_path = 'data/messenger/message_2.json'
concatenated_table_messenger_bis = messenger_model.parse_from_json(messenger_data_path)
concatenated_table_messenger = concatenated_table_messenger.append(concatenated_table_messenger_bis)
concatenated_table_messenger

In [None]:
# Merge two dataframes
concatenated_table = concatenated_table_messenger.append(concatenated_table_telegram)
concatenated_table.shape

In [None]:
concatenated_table= add_timing_to_df(concatenated_table)
concatenated_table

In [None]:
# Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# -------------------------------------------------------------------

import numpy as np

df = concatenated_table


# mark empty bubbles
df['empty_message'] = np.where(df.message == "", True, False)

# prepare intro and ccl for the latex bubbles
df['introtex']  = np.where(df.right, "\\begin{rightbubbles}", "\\begin{leftbubbles}")
df['conclutex'] = np.where(df.right, "\\end{rightbubbles}", "\\end{leftbubbles}")

# add hour
#df['datetex'] = "\\flushright{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"
df['datetex'] = "\\hspace{0.5cm}\\hfill{\\textcolor{mygray}{{\\footnotesize "+df.timeStr+"}}}"


# deal with & in latex
df['message'] = df['message'].replace('&', '\\&')

# concatenate
df['message'] = df[['introtex', 'message', 'datetex', 'conclutex']].apply(lambda x: ' '.join(x), axis=1)

# delete empty bubbles
df['message'] = np.where(df.empty_message, "", df.message)

# deal with photo
# for now only one photo by one photo ? df['path'].apply(len).value_counts()
df['is_photo'] =  np.where(df.path, True, False)
df['tex_for_photo'] = np.where((df.is_photo) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.4\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_photo'] = np.where((df.is_photo) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.4\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_photo)

# join photo and message, with photo first
#df['message'] = df[['tex_for_photo', 'message']].apply(lambda x: ' '.join(x), axis=1)

# deal with gifs
df['is_gif'] =  np.where(df.gifs, True, False)
df['tex_for_gif'] = np.where((df.is_gif) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.3\\textwidth]{'+df.gifs.str.get(0)+'}'+' \n '+'\\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_gif'] = np.where((df.is_gif) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.3\\textwidth]{'+df.gifs.str.get(0)+'}'+' \n '+'\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_gif)


# deal with gifs
df['is_videos'] =  np.where(df.videos, True, False)
df['tex_for_videos'] = np.where((df.is_videos) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.3\\textwidth]{'+df.videos.str.get(0)+'}'+' \n '+'\\caption{Video} \n \\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_videos'] = np.where((df.is_videos) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.3\\textwidth]{'+df.videos.str.get(0)+'}'+' \n '+'\\caption{Video} \n \\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_videos)

# deal with reactions
df['is_reactions'] =  np.where(df.reactions, True, False)
df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right), '\\begin{flushleft}' +df.reactions.str.get(0)+ '\\end{flushleft}', "")
df['tex_for_reactions'] = np.where((df.is_reactions) & (df.right == False), '\\begin{flushright} ' +df.reactions.str.get(0)+ '\\end{flushright}', df.tex_for_reactions)


# join photo and message, with photo first
df['message'] = df[['tex_for_gif', 'tex_for_photo', 'message','tex_for_reactions']].apply(lambda x: ' '.join(x), axis=1)


for date, df_t in df.groupby('date'):
    date_str = df_t['dateStr'].unique()[0]
    title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
    discussion = '\n'.join(df_t.message)
    text = f'{title}\n{discussion}'
    
    with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
        ft.write(text)


In [None]:
# SolutionS to get the first item of the list for path

#df['tex_for_photo'] = np.where((df.is_photo) & (df.right), df.path.str.get(0), "toto")
#test = df['path'].apply(lambda x: "toto" if len(x) == 0 else x[0])
#test
df.tex_for_photo

In [None]:
# # Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# # -------------------------------------------------------------------
# df = concatenated_table
# separate_messages = True

# for date, df_t in df.groupby('date'):
#     date_str = df_t['dateStr'].unique()[0]
#     title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
#     if separate_messages:
#         discussion = '\n'.join(df_t.groupby('message').apply(format_msg))
#     else:
#         discussion = '\n'.join(df_t.groupby('senderGroup').apply(format_msg))
#     text = f'{title}\n{discussion}'
    
#     with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
#         ft.write(text)

In [None]:
import re
test = "https://interncache-atn.fbcdn.net/v/t1.15752-9/125222477_1727067340807430_6855689147600839821_n_1727067334140764.png?ccb=2&_nc_sid=73a6a0&efg=eyJ1cmxnZW4iOiJwaHBfdXJsZ2VuX2NsaWVudC9pbW9nZW46RFlJTWVkaWFVdGlscyJ9&_nc_ad=z-m&_nc_cid=0&_nc_ht=interncache-atn&oh=54adebd7bb72b88abfa5976648859ea3&oe=5FD625B5"
res = re.search("([0-9]*)_([0-9]*)_([0-9]*)_(.).", test)
if 'png' in test:
    print(True)
print(res)
file_name = f"{res.group(1)}_{res.group(2)}_{res.group(3)}_{res.group(4)}_{res.group(2)}.png"
file_name

In [None]:
concatenated_table