# EML格式文件的处理

In [2]:
import email
from email import policy
from email.parser import BytesParser
from bs4 import BeautifulSoup
import re

def extract_eml_details(eml_file_path):
    with open(eml_file_path, 'rb') as f:
        msg = BytesParser(policy=policy.default).parse(f)

    # 提取发件人的邮件地址
    sender = msg['From']
    sender_email = re.search(r'[\w\.-]+@[\w\.-]+', sender).group(0)

    # 提取IP地址（通常从Received头部提取）
    received_headers = msg.get_all('Received', [])
    ip_address = None
    for header in received_headers:
        ip_match = re.search(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b', header)
        if ip_match:
            ip_address = ip_match.group(0)
            break

    # 提取邮件正文内容
    body_content = None
    if msg.is_multipart():
        for part in msg.iter_parts():
            content_type = part.get_content_type()
            if content_type == 'text/plain':
                body_content = part.get_payload(decode=True).decode(part.get_content_charset(), errors='replace')
                break
            elif content_type == 'text/html':
                html_content = part.get_payload(decode=True).decode(part.get_content_charset(), errors='replace')
                soup = BeautifulSoup(html_content, 'html.parser')
                body_content = soup.get_text()
                break
    else:
        content_type = msg.get_content_type()
        if content_type == 'text/plain':
            body_content = msg.get_payload(decode=True).decode(msg.get_content_charset(), errors='replace')
        elif content_type == 'text/html':
            html_content = msg.get_payload(decode=True).decode(msg.get_content_charset(), errors='replace')
            soup = BeautifulSoup(html_content, 'html.parser')
            body_content = soup.get_text()

    # 提取附件的文件名
    attachment_filenames = []
    for part in msg.iter_attachments():
        filename = part.get_filename()
        if filename:
            attachment_filenames.append(filename)

    return {
        'sender_email': sender_email,
        'ip_address': ip_address,
        'body_content': body_content,
        'attachment_filenames': attachment_filenames
    }

# 示例用法
eml_file_path = 'AMD Password Changed.eml'
details = extract_eml_details(eml_file_path)
print(details)



	
	
		

		
			
				
					
						
							
						
					
					
						
							
								
								    
									 Hello
									  Jiaqi,
								    
								
								
								    
									A password was changed for your AMD account 
									jiaqicui666@qq.com.
								    
								
								
								   
									Details
								   
								
								
									
										Friday, August 25, 2023										
										
										Performed by: 
										Jiaqi Cui
									
								
								
									
										Don't recognize this activity?
									
								
								
									
										Your account may have been compromised; please contact account.help@amd.com
									
								
							
						
					
					
						&nbsp;
					
				
			
		
	
