In [1]:
import sqlite3
import pandas as pd

def return_labelled_data(data):
    '''Function to extract only the labelled emails from the sqlite database'''
    
    con = sqlite3.connect(data)
    
    q = '''
    SELECT e.id, e.subject, e.internal_date, e.from_address, e.'to', e.bcc, e.cc, e.reply_to, e.html_body, e.plain_text_body,
           GROUP_CONCAT(CASE WHEN a.original_filename IS NOT NULL THEN a.original_filename ELSE NULL END) AS attachments,
           c.id AS category_id, c.slug AS category_slug
    FROM emails_email e
    JOIN emails_emailattachment a ON e.id = a.email_id
    JOIN emails_emailcategory ec ON e.id = ec.email_id
    JOIN emails_category c ON c.id = ec.category_id
    WHERE ec.category_id IS NOT NULL
    GROUP BY e.id, e.subject, e.internal_date, e.from_address, e.'to', e.bcc, e.cc, e.reply_to, c.id, c.slug
    '''

    # Execute the query and store the results in a DataFrame
    df = pd.read_sql_query(q,con)

    return df

In [2]:
import sqlite3
import pandas as pd

def return_unlabelled_data(data, start_value, limit_value):
    '''Function to extract only emails that are unlabelled from the sqlite database'''
    
    con = sqlite3.connect(data)
    
    q = f'''
    SELECT e.id, e.subject, e.internal_date, e.from_address, e.'to', e.bcc, e.cc, e.reply_to, e.html_body, e.plain_text_body,
           GROUP_CONCAT(CASE WHEN a.original_filename IS NOT NULL THEN a.original_filename ELSE NULL END) AS attachments,
           c.id AS category_id, c.slug AS category_slug
    FROM emails_email e
    LEFT JOIN emails_emailattachment a ON e.id = a.email_id
    LEFT JOIN emails_emailcategory ec ON e.id = ec.email_id
    LEFT JOIN emails_category c ON c.id = ec.category_id
    GROUP BY e.id, e.subject, e.internal_date, e.from_address, e.'to', e.bcc, e.cc, e.reply_to, c.id, c.slug
    LIMIT {limit_value} OFFSET {start_value}
    '''

    # Execute the query and store the results in a DataFrame
    df = pd.read_sql_query(q,con)

    return df