In [6]:
import os
import re
import pandas as pd
import codecs
from getpass import getpass
import imaplib
import email
from bs4 import BeautifulSoup
from nltk import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

"""qmepjfnurwnxhvup"""


'qmepjfnurwnxhvup'

In [7]:
class EmailClassifier:
    CATEGORIES = {
        'education': ['course', 'university', 'lecture', 'assignment', 'professor', 'school', 'college'],
        'business': ['meeting', 'report', 'client', 'project', 'deadline', 'work', 'office'],
        'personal': ['family', 'friend', 'dinner', 'weekend', 'photos', 'birthday', 'holiday'],
        'promotions': ['deal', 'offer', 'discount', 'sale', 'limited', 'coupon', 'promo'],
        'other': []
    }

    def __init__(self):
        self.vectorizer = TfidfVectorizer()
        self.model = MultinomialNB()
        self._train_model()

    def _train_model(self):
        """Train a simple text classifier"""
        texts = []
        labels = []
        for category, keywords in self.CATEGORIES.items():
            texts.extend(keywords)
            labels.extend([category]*len(keywords))
        
        X = self.vectorizer.fit_transform(texts)
        self.model.fit(X, labels)

    def predict(self, text):
        """Predict email category"""
        if not text.strip():
            return 'other'
        vec = self.vectorizer.transform([text])
        return self.model.predict(vec)[0]

In [8]:
class EmailProcessor:
    def __init__(self, username, password):
        self.mail = imaplib.IMAP4_SSL('imap.gmail.com')
        self.mail.socket().settimeout(300)
        self.classifier = EmailClassifier()
        self.mail.login(username, password)

    def get_emails(self, folder='INBOX', limit=100):
        """Fetch emails from specified folder"""
        self.mail.select(folder, readonly=True)
        _, data = self.mail.uid('search', None, 'ALL')
        uids = data[0].decode('utf-8').split()[:limit]
        return [self._parse_email(uid) for uid in uids]

    def _parse_email(self, uid):
        """Parse individual email with category detection"""
        _, data = self.mail.uid('fetch', uid, '(RFC822)')
        msg = email.message_from_bytes(data[0][1])
        
        email_data = {
            'From': msg['From'],
            'Subject': msg['Subject'],
            'Body': self._extract_body(msg),
            'Folder': 'INBOX'  # Default, updated later
        }
        
        # Add category prediction
        content = f"{email_data['Subject']} {email_data['Body']}"
        email_data['Category'] = self.classifier.predict(content)
        
        return email_data

    def _extract_body(self, msg):
        """Extract and clean email body"""
        body = ""
        for part in msg.walk():
            if part.get_content_type() in ['text/plain', 'text/html']:
                try:
                    payload = part.get_payload(decode=True)
                    text = payload.decode('utf-8', 'ignore')
                    if part.get_content_type() == 'text/html':
                        text = BeautifulSoup(text, 'html5lib').get_text()
                    body += text
                except Exception:
                    continue
        return self._clean_text(body)

    def _clean_text(self, text):
        """Clean email text for analysis"""
        text = re.sub(r'<[^>]+>', '', text)
        text = re.sub(r'\s+', ' ', text)
        words = [w for w in word_tokenize(text) 
                if len(w) <= 15 and re.match('[A-Za-z0-9]+', w)]
        return ' '.join(words)

In [9]:
class DataOrganizer:
    @staticmethod
    def categorize_emails(emails):
        """Sort emails into category buckets"""
        categories = {cat: [] for cat in EmailClassifier.CATEGORIES}
        for email in emails:
            categories[email['Category']].append(email)
        return categories

    @staticmethod
    def save_to_excel(categorized_emails, output_dir='Downloads'):
        """Save each category to separate Excel file"""
        os.makedirs(output_dir, exist_ok=True)
        for category, emails in categorized_emails.items():
            df = pd.DataFrame(emails)
            df.to_excel(f'{output_dir}/{category}_emails.xlsx', index=False)
            print(f"Saved {len(emails)} {category} emails")

In [12]:
def main():
    try:
        # Authentication
        email_id = input('Enter Gmail: ')
        password = getpass('Enter Password: ')
        
        # Initialize components
        processor = EmailProcessor(email_id, password)
        organizer = DataOrganizer()

        # Process emails
        print("\nFetching emails...")
        emails = processor.get_emails(limit=10)  # Process first 10 for demo
        
        # Categorize and save
        categorized = organizer.categorize_emails(emails)
        organizer.save_to_excel(categorized)
        
        # Print summary
        print("\nEmail Categories Summary:")
        for cat, items in categorized.items():
            print(f"{cat.title():<10}: {len(items)} emails")

    except Exception as e:
        print(f"\nError: {str(e)}")
        print("Check: 1) IMAP access 2) App Password if 2FA enabled")


if __name__ == '__main__':
    main()


Fetching emails...

Error: command: UID => socket error: EOF
Check: 1) IMAP access 2) App Password if 2FA enabled


In [None]:
import os
import imaplib
import pandas as pd
from getpass import getpass
import email
from email.header import decode_header

class RawEmailBackup:
    def __init__(self, username, password):
        self.mail = imaplib.IMAP4_SSL('imap.gmail.com')
        self.mail.login(username, password)
        self.base_dir = os.path.join(os.getcwd(), 'Downloads')
        
    def backup_raw_emails(self):
        """Backup raw emails by searching for matching subjects"""
        try:
            self.mail.select('INBOX', readonly=True)
            
            for category in ['business', 'education', 'personal', 'promotions']:
                excel_path = os.path.join(self.base_dir, f'{category}_emails.xlsx')
                
                if os.path.exists(excel_path):
                    print(f"\n📂 Backing up {category} emails...")
                    self._backup_by_subject(category, excel_path)
                    
        except Exception as e:
            print(f"⚠️ Error: {str(e)}")
        finally:
            self.mail.logout()

    def _backup_by_subject(self, category, excel_path):
        """Backup emails by matching subjects"""
        raw_dir = os.path.join(self.base_dir, f'{category}_raw')
        os.makedirs(raw_dir, exist_ok=True)
        
        try:
            df = pd.read_excel(excel_path)
            if 'Subject' not in df.columns:
                print(f"❌ No Subject column in {excel_path}")
                return
                
            success = 0
            
            for subject in df['Subject'].unique()[:10]:  # Limit to 10 unique subjects
                try:
                    # Search for emails with this subject
                    status, messages = self.mail.search(None, f'SUBJECT "{subject}"')
                    if status != 'OK':
                        continue
                        
                    for mail_id in messages[0].split()[:3]:  # Max 3 emails per subject
                        status, data = self.mail.fetch(mail_id, '(RFC822)')
                        if status == 'OK':
                            filename = f"{category}_{mail_id.decode()}.eml"
                            with open(os.path.join(raw_dir, filename), 'wb') as f:
                                f.write(data[0][1])
                            success += 1
                            
                except Exception as e:
                    print(f"✗ Subject '{subject[:20]}...': {str(e)[:50]}")
                    
            print(f"✅ Saved {success} emails to {raw_dir}")
            
        except Exception as e:
            print(f"❌ Failed to process {category}: {str(e)}")

def main():
    print("\n📧 SUBJECT-BASED EMAIL BACKUP")
    print("---------------------------")
    print("Note: Will match emails by subject from Excel files")
    
    email = input("Your Gmail: ")
    password = getpass("Password: ")
    
    backup = RawEmailBackup(email, password)
    backup.backup_raw_emails()

if __name__ == '__main__':
    main()


📧 SUBJECT-BASED EMAIL BACKUP
---------------------------
Note: Will match emails by subject from Excel files

📂 Backing up business emails...
✗ Subject 'NTA - JEE(Main)-2022...': SEARCH command error: BAD [b'Could not parse comma
✗ Subject 'NTA - JEE(Main)-2022...': SEARCH command error: BAD [b'Could not parse comma
✗ Subject 'NTA - JEE(Main)-2022...': SEARCH command error: BAD [b'Could not parse comma
✅ Saved 10 emails to c:\Users\aspk1\OneDrive\Desktop\Gmail-Classification\Downloads\business_raw

📂 Backing up education emails...
✗ Subject 'ANUMALASETTY POORNA ...': SEARCH command error: BAD [b'Could not parse comma
✅ Saved 8 emails to c:\Users\aspk1\OneDrive\Desktop\Gmail-Classification\Downloads\education_raw

📂 Backing up personal emails...
✅ Saved 16 emails to c:\Users\aspk1\OneDrive\Desktop\Gmail-Classification\Downloads\personal_raw

📂 Backing up promotions emails...
✗ Subject 'IRCTC' s Online Rail...': SEARCH command error: BAD [b'Could not parse comma
✅ Saved 9 emails to c:\U

In [None]:
import os
import imaplib
import email
from getpass import getpass
import pandas as pd
from email.header import decode_header

class CompleteRawBackup:
    def __init__(self, username, password):
        self.mail = imaplib.IMAP4_SSL('imap.gmail.com')
        self.mail.login(username, password)
        self.base_dir = os.path.join(os.getcwd(), 'Raw_Emails_TXT')
        os.makedirs(self.base_dir, exist_ok=True)

    def backup_all_emails(self):
        """Backup ALL emails from all categories"""
        try:
            # Get all available folders
            status, folders = self.mail.list()
            if status != 'OK':
                raise Exception("Failed to list folders")
            
            for folder_info in folders:
                folder_name = self._parse_folder_name(folder_info.decode())
                if folder_name:
                    self._process_folder(folder_name)
                    
        except Exception as e:
            print(f"⚠️ Error: {str(e)}")
        finally:
            self.mail.logout()

    def _parse_folder_name(self, folder_info):
        """Extract folder name from IMAP response"""
        parts = folder_info.split(' "/" ')
        return parts[-1].strip('"') if len(parts) > 1 else None

    def _process_folder(self, folder_name):
        """Process all emails in one folder"""
        try:
            print(f"\n📂 Processing folder: {folder_name}")
            status = self.mail.select(folder_name, readonly=True)
            if status[0] != 'OK':
                return
                
            # Create category folder
            clean_name = folder_name.replace('[Gmail]/', '').replace('/', '_')
            category_dir = os.path.join(self.base_dir, clean_name)
            os.makedirs(category_dir, exist_ok=True)
            
            # Search ALL emails
            status, messages = self.mail.search(None, 'ALL')
            if status != 'OK':
                return
                
            for mail_id in messages[0].split():
                self._save_email(mail_id, category_dir)
                
        except Exception as e:
            print(f"✗ Error in {folder_name}: {str(e)[:50]}...")

    def _save_email(self, mail_id, category_dir):
        """Save individual email as TXT"""
        try:
            status, data = self.mail.fetch(mail_id, '(RFC822)')
            if status != 'OK':
                return
                
            msg = email.message_from_bytes(data[0][1])
            
            # Decode subject
            subject, encoding = decode_header(msg['Subject'])[0]
            if isinstance(subject, bytes):
                subject = subject.decode(encoding or 'utf-8', errors='replace')
            
            # Clean filename
            filename = f"{subject[:50]}_{mail_id.decode()}.txt".replace('/', '_')
            filepath = os.path.join(category_dir, filename)
            
            # Save complete raw email
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(f"From: {msg['From']}\n")
                f.write(f"To: {msg['To']}\n")
                f.write(f"Subject: {subject}\n")
                f.write(f"Date: {msg['Date']}\n\n")
                
                # Save body content
                if msg.is_multipart():
                    for part in msg.walk():
                        content_type = part.get_content_type()
                        if content_type in ['text/plain', 'text/html']:
                            try:
                                body = part.get_payload(decode=True).decode('utf-8', errors='replace')
                                f.write(body + "\n\n")
                            except:
                                continue
                else:
                    body = msg.get_payload(decode=True).decode('utf-8', errors='replace')
                    f.write(body)
            
            print(f"✓ Saved: {filename}")
            
        except Exception as e:
            print(f"✗ Failed email {mail_id}: {str(e)[:50]}...")

def main():
    print("\n📧 COMPLETE RAW EMAIL BACKUP")
    print("--------------------------")
    print("This will save ALL emails in TXT format")
    print("organized by Gmail folders\n")
    
    email = input("Your Gmail: ")
    password = getpass("Password: ")
    
    backup = CompleteRawBackup(email, password)
    backup.backup_all_emails()
    print(f"\n✅ All emails saved to: {os.path.abspath(backup.base_dir)}")

if __name__ == '__main__':
    main()


📧 COMPLETE RAW EMAIL BACKUP
--------------------------
This will save ALL emails in TXT format
organized by Gmail folders


📂 Processing folder: INBOX
✓ Saved: Poorna Kumar, finish setting up your new Google Ac_1.txt
✓ Saved: Your Google Account was recovered successfully_2.txt
✓ Saved: Security alert_3.txt
✓ Saved: NTA - JEE(Main)-2022 Session 1  0648976 Applicatio_4.txt
✓ Saved: NTA - JEE(Main)-2022 Session 1 E-Mail Verification_5.txt
✓ Saved: NTA - JEE(Main)-2022  Session 1  0648976 Applicati_6.txt
✓ Saved: NTA - JEE(Main)-2022  Session 1  0648976 Confirmat_7.txt
✓ Saved: Fwd:_8.txt
✓ Saved: AP EAPCET-2022 Payment Details_9.txt
✓ Saved: AP EAPCET - 2022 Application Details_10.txt
✓ Saved: TS EAMCET-2022 Payment Details_11.txt
✓ Saved: TS EAMCET-2022 Application Details_12.txt
✓ Saved: TS EAMCET-2022 Application Details_13.txt
✓ Saved: TS EAMCET-2022 _14.txt
✓ Saved: B.Tech. Admissions 2022 - Login Crendentials_15.txt
✓ Saved: Thank You ANUMALA SETTY PURNA KUMAR for Registerin_16.tx