In [25]:
import re
import pandas as pd

# Load the chat file
with open("_chat.txt", "r", encoding="utf-8") as f:
    data = f.read()

# Regex for message blocks: [dd/mm/yy, hh:mm:ss AM/PM] ~ Name: Message
pattern = r'\[(\d{2}/\d{2}/\d{2}), (\d{1,2}:\d{2}:\d{2} [APM]{2})\] (.*?): (.*?)(?=\n\[|\Z)'

matches = re.findall(pattern, data, re.DOTALL)

# Build a clean DataFrame
df = pd.DataFrame(matches, columns=["date", "time", "sender", "message"])

# Combine date and time
df['datetime'] = pd.to_datetime(df['date'] + " " + df['time'], format="%d/%m/%y %I:%M:%S %p")

df


Unnamed: 0,date,time,sender,message,datetime
0,14/12/24,11:15:50 PM,Tiss Tuljapur In Campus,‎Messages and calls are end-to-end encrypted. ...,2024-12-14 23:15:50
1,15/12/24,9:52:12 PM,~ Subham Ghosh,Guys there is a leopard spotted near Lake it s...,2024-12-15 21:52:12
2,15/12/24,9:52:51 PM,~ SREE HARSH S,‎This message was deleted.,2024-12-15 21:52:51
3,15/12/24,10:00:25 PM,Aditya,"Those who are in DH or Library stay there, Cam...",2024-12-15 22:00:25
4,15/12/24,10:02:47 PM,~ Romeo.,@919366022757 ngao ngaoo🧟,2024-12-15 22:02:47
...,...,...,...,...,...
1432,13/05/25,9:46:24 PM,Aditya,It is to bring to your notice that world's big...,2025-05-13 21:46:24
1433,13/05/25,9:47:55 PM,Zeeshan,"Okay, Mr President 🤭",2025-05-13 21:47:55
1434,13/05/25,10:06:46 PM,~ D,"Anybody has nurse mausi's number\n‎[13/05/25, ...",2025-05-13 22:06:46
1435,13/05/25,10:26:23 PM,~ Rutuja Kshirsagar.💜,‎This message was deleted.,2025-05-13 22:26:23


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1437 entries, 0 to 1436
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1437 non-null   object        
 1   time      1437 non-null   object        
 2   sender    1437 non-null   object        
 3   message   1437 non-null   object        
 4   datetime  1437 non-null   datetime64[ns]
dtypes: datetime64[ns](1), object(4)
memory usage: 56.3+ KB


In [29]:
# Convert to datetime first
df['date'] = pd.to_datetime(df['date'], errors='coerce')  # coerce makes invalid entries NaT

# Now safely extract components
df['year'] = df['date'].dt.year
df['month_num'] = df['date'].dt.month
df['month'] = df['date'].dt.month_name()
df['day'] = df['date'].dt.day
df['day_name'] = df['date'].dt.day_name()
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute
df['only_date'] = df['date'].dt.date


  df['date'] = pd.to_datetime(df['date'], errors='coerce')  # coerce makes invalid entries NaT


In [30]:
df

Unnamed: 0,date,time,sender,message,datetime,year,month_num,month,day,day_name,hour,minute,only_date
0,2024-12-14,11:15:50 PM,Tiss Tuljapur In Campus,‎Messages and calls are end-to-end encrypted. ...,2024-12-14 23:15:50,2024,12,December,14,Saturday,0,0,2024-12-14
1,2024-12-15,9:52:12 PM,~ Subham Ghosh,Guys there is a leopard spotted near Lake it s...,2024-12-15 21:52:12,2024,12,December,15,Sunday,0,0,2024-12-15
2,2024-12-15,9:52:51 PM,~ SREE HARSH S,‎This message was deleted.,2024-12-15 21:52:51,2024,12,December,15,Sunday,0,0,2024-12-15
3,2024-12-15,10:00:25 PM,Aditya,"Those who are in DH or Library stay there, Cam...",2024-12-15 22:00:25,2024,12,December,15,Sunday,0,0,2024-12-15
4,2024-12-15,10:02:47 PM,~ Romeo.,@919366022757 ngao ngaoo🧟,2024-12-15 22:02:47,2024,12,December,15,Sunday,0,0,2024-12-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1432,2025-05-13,9:46:24 PM,Aditya,It is to bring to your notice that world's big...,2025-05-13 21:46:24,2025,5,May,13,Tuesday,0,0,2025-05-13
1433,2025-05-13,9:47:55 PM,Zeeshan,"Okay, Mr President 🤭",2025-05-13 21:47:55,2025,5,May,13,Tuesday,0,0,2025-05-13
1434,2025-05-13,10:06:46 PM,~ D,"Anybody has nurse mausi's number\n‎[13/05/25, ...",2025-05-13 22:06:46,2025,5,May,13,Tuesday,0,0,2025-05-13
1435,2025-05-13,10:26:23 PM,~ Rutuja Kshirsagar.💜,‎This message was deleted.,2025-05-13 22:26:23,2025,5,May,13,Tuesday,0,0,2025-05-13


In [32]:
df['hour'] = df['datetime'].dt.hour
df['minute'] = df['datetime'].dt.minute

df

Unnamed: 0,date,time,sender,message,datetime,year,month_num,month,day,day_name,hour,minute,only_date
0,2024-12-14,11:15:50 PM,Tiss Tuljapur In Campus,‎Messages and calls are end-to-end encrypted. ...,2024-12-14 23:15:50,2024,12,December,14,Saturday,23,15,2024-12-14
1,2024-12-15,9:52:12 PM,~ Subham Ghosh,Guys there is a leopard spotted near Lake it s...,2024-12-15 21:52:12,2024,12,December,15,Sunday,21,52,2024-12-15
2,2024-12-15,9:52:51 PM,~ SREE HARSH S,‎This message was deleted.,2024-12-15 21:52:51,2024,12,December,15,Sunday,21,52,2024-12-15
3,2024-12-15,10:00:25 PM,Aditya,"Those who are in DH or Library stay there, Cam...",2024-12-15 22:00:25,2024,12,December,15,Sunday,22,0,2024-12-15
4,2024-12-15,10:02:47 PM,~ Romeo.,@919366022757 ngao ngaoo🧟,2024-12-15 22:02:47,2024,12,December,15,Sunday,22,2,2024-12-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1432,2025-05-13,9:46:24 PM,Aditya,It is to bring to your notice that world's big...,2025-05-13 21:46:24,2025,5,May,13,Tuesday,21,46,2025-05-13
1433,2025-05-13,9:47:55 PM,Zeeshan,"Okay, Mr President 🤭",2025-05-13 21:47:55,2025,5,May,13,Tuesday,21,47,2025-05-13
1434,2025-05-13,10:06:46 PM,~ D,"Anybody has nurse mausi's number\n‎[13/05/25, ...",2025-05-13 22:06:46,2025,5,May,13,Tuesday,22,6,2025-05-13
1435,2025-05-13,10:26:23 PM,~ Rutuja Kshirsagar.💜,‎This message was deleted.,2025-05-13 22:26:23,2025,5,May,13,Tuesday,22,26,2025-05-13
