In [3]:
import xml.etree.ElementTree as ET
import pandas as pd
from datetime import datetime

file_path = "Rishabh.xml"

def load_sms_data(file_path):
    messages = []

    # Map integer SmsType to readable text
    type_map = {
        "1": "Inbox",
        "2": "Sent",
        "3": "Draft",
        "4": "Outbox",
        "5": "Failed",
        "6": "Queued"
    }

    def parse_sms_backup_and_restore(root):
        for sms in root.findall(".//sms"):
            sender = sms.attrib.get("address", "").strip()
            date_ms = sms.attrib.get("date", "")
            try:
                dt_obj = datetime.fromtimestamp(int(date_ms) / 1000)
                date_str = dt_obj.strftime('%Y-%m-%d')
                time_str = dt_obj.strftime('%H:%M:%S')
            except:
                date_str = ""
                time_str = ""
            body = sms.attrib.get("body", "").replace("\n", " ").strip()

            msg_type = sms.attrib.get("type", "")
            msg_type = type_map.get(msg_type, "Unknown")  # Convert to word

            messages.append({
                "date": date_str,
                "time": time_str,
                "sender": sender,
                "message_type": msg_type,
                "message_body": body,
                "spam": 0
            })

    with open(file_path, 'r', encoding="utf-8", errors="replace") as f:
        xml_content = f.read()
        root = ET.fromstring(xml_content)
        parse_sms_backup_and_restore(root)

    df = pd.DataFrame(messages)
    return df

df = load_sms_data(file_path)
print(df.head(10))
print(f"✅ Loaded {len(df)} messages")

         date      time       sender message_type  \
0  2025-07-29  13:36:08  BV-INPOST-G        Inbox   
1  2025-07-29  19:04:14  AD-SBIUPI-S        Inbox   
2  2025-07-30  11:28:28  VM-SBIUPI-S        Inbox   
3  2025-07-30  11:31:11  AX-SBIINB-S        Inbox   
4  2025-07-30  15:41:49  VA-CANBNK-S        Inbox   
5  2025-07-31  00:30:49  JM-JioSvc-S        Inbox   
6  2025-07-31  09:17:28  AE-AIRMCA-S        Inbox   
7  2025-07-31  09:45:45  VM-CANBNK-S        Inbox   
8  2025-07-31  16:40:19  AE-AIRINF-T        Inbox   
9  2025-07-31  16:40:49  AD-AIRINF-S        Inbox   

                                        message_body  spam  
0  Article PT071224430IN delivered on 29/07/2025 ...     0  
1  Dear SBI User, your A/c X7447-credited by Rs.4...     0  
2  Dear UPI user A/C X7447 debited by 48000.0 on ...     0  
3  Dear Customer, SBCollect Txn DUO4034076 dtd 30...     0  
4  An amount of INR 16,300.00 has been DEBITED to...     0  
5  For seamless data experience across the countr.

In [4]:
messages = df.drop(["date", "time", "sender", "message_type", "spam"], axis = 1)

In [5]:
messages

Unnamed: 0,message_body
0,Article PT071224430IN delivered on 29/07/2025 ...
1,"Dear SBI User, your A/c X7447-credited by Rs.4..."
2,Dear UPI user A/C X7447 debited by 48000.0 on ...
3,"Dear Customer, SBCollect Txn DUO4034076 dtd 30..."
4,"An amount of INR 16,300.00 has been DEBITED to..."
...,...
119,"An amount of INR 3,368.00 has been DEBITED to ..."
120,"Dear Customer,An advance amount of Rs.INR 3,36..."
121,रिमाइंडर: आपके पास 1GB डाटा लोन का भुगतान करना...
122,हम हर दिन बेहतर हो रहे हैं! BAKTAURI PURWA में...


In [6]:
df

Unnamed: 0,date,time,sender,message_type,message_body,spam
0,2025-07-29,13:36:08,BV-INPOST-G,Inbox,Article PT071224430IN delivered on 29/07/2025 ...,0
1,2025-07-29,19:04:14,AD-SBIUPI-S,Inbox,"Dear SBI User, your A/c X7447-credited by Rs.4...",0
2,2025-07-30,11:28:28,VM-SBIUPI-S,Inbox,Dear UPI user A/C X7447 debited by 48000.0 on ...,0
3,2025-07-30,11:31:11,AX-SBIINB-S,Inbox,"Dear Customer, SBCollect Txn DUO4034076 dtd 30...",0
4,2025-07-30,15:41:49,VA-CANBNK-S,Inbox,"An amount of INR 16,300.00 has been DEBITED to...",0
...,...,...,...,...,...,...
119,2025-09-01,16:43:50,AX-CANBNK-S,Inbox,"An amount of INR 3,368.00 has been DEBITED to ...",0
120,2025-09-01,16:44:13,AX-CANBNK-S,Inbox,"Dear Customer,An advance amount of Rs.INR 3,36...",0
121,2025-09-01,18:38:58,AE-AIRTEL-S,Inbox,रिमाइंडर: आपके पास 1GB डाटा लोन का भुगतान करना...,0
122,2025-09-01,20:08:47,AE-AIRTEL-S,Inbox,हम हर दिन बेहतर हो रहे हैं! BAKTAURI PURWA में...,0


In [16]:
df.to_csv("Aman_all.csv", index = False)

In [7]:
messages.to_csv("Rishabh_mess.csv", index = False)