In [1]:
import sys
sys.path.append('src')

In [2]:
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR')   ## set French as local language, useful for date display
# locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')  # non-windows computer? 

import pandas as pd

from models.telegram import TelegramModel
from models.messenger import MessengerModel
from utils.formatting import process_for_latex
from utils.timing import add_timing_to_df
from utils.formatting import left_formating, right_formating, left_formating_with_bubbles
from utils.formatting import right_formating_with_bubbles, format_msg

In [3]:
telegram_model = TelegramModel()
messenger_model = MessengerModel()

In [4]:
telegram_data_path = 'data/telegram/ChatExport_2020-11-15/result.json'
concatenated_table_telegram = telegram_model.parse_from_json(telegram_data_path)
#concatenated_table_telegram.info()
concatenated_table_telegram

['../data/telegram/ChatExport_2020-11-15/photos/photo_1@18-07-2020_00-16-11.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_2@18-07-2020_08-35-07.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_3@18-07-2020_10-49-50.jpg']
['../data/telegram/ChatExport_2020-11-15/photos/photo_4@18-07-2020_10-49-50.jpg']


Unnamed: 0,source,datetime,sender,message,path,reactions
0,Telegram,2020-06-16T18:01:09,B,La pandémie du covid-19 est une maladie nouvel...,[],[]
1,Telegram,2020-06-16T18:06:17,M,Lois mathématiques --,[],[]
2,Telegram,2020-06-17T09:19:55,M,"J'ai lu le teasing d'intro, super bien posé, h...",[],[]
3,Telegram,2020-07-07T15:54:05,B,Du coup si on résume : \ * \emoji[ios]{1F682} ...,[],[]
4,Telegram,2020-07-18T00:16:11,B,j'ai meme fait le graphique de la médecin,[../data/telegram/ChatExport_2020-11-15/photos...,[]
5,Telegram,2020-07-18T00:16:18,B,\#sage,[],[]
6,Telegram,2020-07-18T00:16:26,B,En vrai j'ai rarement autant bossé depuis long...,[],[]
7,Telegram,2020-07-18T08:35:06,B,Petit dej in the train !!,[],[]
8,Telegram,2020-07-18T08:35:07,B,,[../data/telegram/ChatExport_2020-11-15/photos...,[]
9,Telegram,2020-07-18T08:39:39,B,Avec mes petits loups \emoji[ios]{2764}️,[],[]


In [5]:
messenger_data_path = 'data/messenger/message_1.json'
concatenated_table_messenger = messenger_model.parse_from_json(messenger_data_path)
concatenated_table_messenger

Unnamed: 0,source,datetime,sender,message,path,reactions
0,Messenger,2020-11-13 20:26:38.528000,B,La publie de microbiologie est finale !! On va...,[],[]
1,Messenger,2020-11-13 20:26:21.514000,B,ET,[],[]
2,Messenger,2020-11-13 20:26:20.306000,B,Mais tout est prêt.,[],[]
3,Messenger,2020-11-13 20:26:17.204000,B,Juste savoir quelle affiliation,[],[]
4,Messenger,2020-11-13 20:26:08.027000,B,\emoji[ios]{1F624},[],[]
5,Messenger,2020-11-13 20:26:04.444000,B,Il manque un truc de un auteur,[],[]
6,Messenger,2020-11-13 20:25:57.474000,B,Noooooon,[],[]
7,Messenger,2020-11-13 19:29:15.187000,M,Oh yeaaaah !!! Ca y est ça y est ???,[],[]
8,Messenger,2020-11-13 13:54:49.405000,B,*est,[],[]
9,Messenger,2020-11-13 13:54:45.589000,B,\emoji[ios]{1F929},[],[]


In [6]:
# Merge two dataframes
concatenated_table = concatenated_table_messenger.append(concatenated_table_telegram)
concatenated_table.shape

(26, 6)

In [7]:
concatenated_table= add_timing_to_df(concatenated_table)

In [8]:
# Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# -------------------------------------------------------------------

import numpy as np

df = concatenated_table


# mark empty bubbles
df['empty_message'] = np.where(df.message == "", True, False)

# prepare intro and ccl for the latex bubbles
df['introtex']  = np.where(df.right, "\\begin{rightbubbles}", "\\begin{leftbubbles}")
df['conclutex'] = np.where(df.right, "\\end{rightbubbles}", "\\end{leftbubbles}")

# concatenate
df['message'] = df[['introtex', 'message', 'conclutex']].apply(lambda x: ' '.join(x), axis=1)

# delete empty bubbles
df['message'] = np.where(df.empty_message, "", df.message)

# deal with photo
# for now only one photo by one photo ? df['path'].apply(len).value_counts()
df['is_photo'] =  np.where(df.path, True, False)
df['tex_for_photo'] = np.where((df.is_photo) & (df.right), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushright}' + ' \n ' + '\\includegraphics[width=0.5\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushright}'+'\n'+'\\end{figure}', "")
df['tex_for_photo'] = np.where((df.is_photo) & (df.right == False), '\\begin{figure}[H]'+ ' \n ' +'\\begin{flushleft}' + ' \n ' + '\\includegraphics[width=0.5\\textwidth]{'+df.path.str.get(0)+'}'+' \n '+'\\end{flushleft}'+'\n'+'\\end{figure}', df.tex_for_photo)

# join photo and message, with photo first
df['message'] = df[['tex_for_photo', 'message']].apply(lambda x: ' '.join(x), axis=1)

for date, df_t in df.groupby('date'):
    date_str = df_t['dateStr'].unique()[0]
    title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
    discussion = '\n'.join(df_t.message)
    text = f'{title}\n{discussion}'
    
    with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
        ft.write(text)


In [9]:
# SolutionS to get the first item of the list for path

#df['tex_for_photo'] = np.where((df.is_photo) & (df.right), df.path.str.get(0), "toto")
#test = df['path'].apply(lambda x: "toto" if len(x) == 0 else x[0])
#test
df.tex_for_photo

0                                                      
1                                                      
2                                                      
3                                                      
4     \begin{figure}[H] \n \begin{flushright} \n \in...
5                                                      
6                                                      
7                                                      
8     \begin{figure}[H] \n \begin{flushright} \n \in...
9                                                      
10                                                     
11    \begin{figure}[H] \n \begin{flushleft} \n \inc...
12    \begin{figure}[H] \n \begin{flushleft} \n \inc...
13    \begin{figure}[H] \n \begin{flushright} \n \in...
14                                                     
15                                                     
16                                                     
17                                              

In [10]:
# # Loop over dates, generate yyyy-mm-dd.tex section files, one per day
# # -------------------------------------------------------------------
# df = concatenated_table
# separate_messages = True

# for date, df_t in df.groupby('date'):
#     date_str = df_t['dateStr'].unique()[0]
#     title = '\\section*{' + date_str + '\markboth{\MakeLowercase{'+ date_str +'}}{}}'
#     if separate_messages:
#         discussion = '\n'.join(df_t.groupby('message').apply(format_msg))
#     else:
#         discussion = '\n'.join(df_t.groupby('senderGroup').apply(format_msg))
#     text = f'{title}\n{discussion}'
    
#     with open(f'./output/sections/{date}.tex', 'w', encoding='utf-8') as ft:
#         ft.write(text)