In [79]:
#EL COPIES EXTRACTOR

import logging
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import smtplib
from email.mime.text import MIMEText
import csv

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    filename='EL_copy_logging.log',filemode='w',
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
    
)

processed_records=0
search_str_list = set()

def update_lookup_file(search_str_list, receiver_email):
    """
    Updates the lookup file with search strings and associated emails.
    If a search string already exists, it appends new emails to the existing line.
    """
    lookup_file_path = "C:\\Users\\kadivya\\Downloads\\lookup.csv"
    
    # Read existing lines from the file
    try:
        with open(lookup_file_path, 'r') as lookup:
            lines = [line.strip() for line in lookup.readlines()]
            header = lines[0] if lines else "Supplier_Number,Company_Name,Emails"  # Preserve the header or create a default one
            existing_lines = lines[1:] if len(lines) > 1 else []  # Existing data without header
    except FileNotFoundError:
        # If the file doesn't exist, initialize an empty list
        logging.info("Lookup file not found")
        
    
    # Convert existing lines to a dictionary for easier updates
    lookup_dict = {}
    lookup_dict1 = {}
    for line in existing_lines:
        parts = line.split(",")
        search_string = parts[0].strip()
        emails = [email.strip() for email in parts[1:] if email.strip()]
        lookup_dict[search_string] = set(emails)
    
    # Update the dictionary with new search strings and emails
    for search_str in search_str_list:
        if search_str in lookup_dict:
            # Append new emails to the existing set
            lookup_dict[search_str].update(receiver_email.get(search_str, "").split(","))
        else:
            # Add a new entry for the search string
            lookup_dict1[search_str] = set(receiver_email[search_str].split(","))   
    print("lookup_dict",lookup_dict)
    print("lookup_dict1",lookup_dict1)
    result={**lookup_dict,**lookup_dict1}
    # Prepare updated lines
    updated_lines = [header] if header else []
    for search_str, emails in result.items():
        updated_lines.append(f"{search_str}, ,{','.join(emails)}")

        

    # Write the updated content back to the file
    try:
        with open(lookup_file_path, 'w') as lookup:
            lookup.write("\n".join(updated_lines) + "\n")
        logging.info(f"Updated lookup file with search strings: {search_str_list}")
    except Exception as e:
        logging.error(f"Error writing to lookup file: {e}")

def send_batch_emails(missing_search_strings, receiver_email):
    """
    Sends a batch email notification for all missing search strings.
    Validates email addresses and notifies valid recipients about invalid emails.
    """
    email_set = set()
  
    if not missing_search_strings:
        return
    
    for missin_ in missing_search_strings:
        if missin_ in list(receiver_email.keys()):
            email_set.update(receiver_email[missin_].split(","))
        
        
        sender_email = "20981a0583@raghuenggcollege.in"
        password = "aofq drmq vdeb xbpb"
        subject = "Missing Search Strings Notification"
        body=f"The following search strings were not found in any file:\n\n" + "\n".join(missing_search_strings)
           
        
        msg = MIMEText(body)
        msg['Subject'] = subject
        msg['From'] = sender_email
        msg['To'] = ",".join(email_set)

        try:
            server = smtplib.SMTP('smtp.gmail.com', 587)
            server.starttls()
            server.login(sender_email, password)
            server.sendmail(sender_email, email_set, msg.as_string())
            logging.warning(f"Batch email sent for missing search strings to valid emails {email_set}.")
        except Exception as e:
            logging.error(f"Failed to send email to recipients. Error: {email_set}")
        email_set.clear()

def process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email):
    """
    Processes all files in the folder to search for multiple strings.
    """
    try:
        
        files = os.listdir(EL_folderpath)
        file_paths = [os.path.join(EL_folderpath, f) for f in files]  # Full paths
        missing_search_strings = []
        global processed_records 

        with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            futures = {executor.submit(extracting_ELcopies, file_paths, search_str): search_str for search_str in search_strings}
            with open(output_file, 'a') as out_file:
                for future in as_completed(futures):
                    search_str = futures[future]
                    matched_lines = future.result()
                    if matched_lines:
                        out_file.writelines(matched_lines)
                        processed_records += 1
                        logging.info(f"Processed record for search string: {search_str}")
                    else:
                        missing_search_strings.append(search_str[2:])
        print("missing_search_strings",missing_search_strings)
        print("search_str_list",search_str_list)
        update_lookup_file(search_str_list,receiver_email)
        # Send batch email for missing search strings
        send_batch_emails(missing_search_strings, receiver_email)

    except FileNotFoundError:
        logging.error(f"Error: The folder '{EL_folderpath}' was not found.")


def extracting_ELcopies(EL_files, search_str):
    """
    Function to extract EL copies from the CSV file and append them to a new file.
    """
    matched_lines = []
    search_str="0"+ str(search_str)
    for file_path in EL_files:
        if not os.path.isfile(file_path):
            logging.error(f"Error: The file '{file_path}' was not found.")
            continue
        try:
            with open(file_path, 'r') as file:
                EL_f=file.readlines()
                for line in EL_f:

                    stripped_line = line.strip()
                    if search_str in stripped_line[:10]:
                        search_str_list.add(search_str[3:10])
                        matched_lines.append(stripped_line + "\n")
                        
                        return matched_lines
                        
                    
                logging.info(f"EL copies extracted and appended to the file EL_lines_output.txt")
        except FileNotFoundError:
            logging.error(f"Error: The file '{file_path}' was not found.")
    

    
def Read_csv(csvfile_ELcopies):
    """
    Function to read a CSV file and return a list of search strings.
    """
    try:
        with open(csvfile_ELcopies, 'r') as csvfile:
            lines=[line.strip() for line in csvfile.readlines() if line.strip()]
            existing_lines=lines[1:] if len(lines)>1 else []
            for line in existing_lines:
                search_strings=[line.split(',')[0].strip() for line in existing_lines if line.strip()]
        logging.info(f"CSV file {csvfile} read successfully.")
        return search_strings
    except FileNotFoundError:
        logging.error(f"CSV file {csvfile} not found.")
        return []
    except Exception as e:
        logging.error(f"An error occurred while reading the CSV file: {e}")
        return []

def create_supplier_email_dict(csv_file_path):
    """
    Reads a CSV file and creates a dictionary with Supplier_Number as the key and Email as the value.

    :param csv_file_path: Path to the CSV file
    :return: Dictionary with Supplier_Number as keys and Email as values
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file_path)

        # Ensure the required columns exist
        if 'String' not in df.columns or 'Email' not in df.columns:
            raise KeyError("The required columns 'Supplier_Number' and 'Email' are missing in the CSV file.")

        # Create the dictionary
        df['String'] = df['String'].str[2:]
        supplier_email_dict = dict(zip(df['String'], df['Email']))
        logging.info(f"Supplier email dictionary created successfully from {csv_file_path}.")
        return supplier_email_dict
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
    except KeyError as e:
        logging.error(f"Error: {e}")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")

def validate_and_notify(csv_file_path):
    """
    Validates each row in the CSV file and sends an email if the number of columns
    in a row exceeds the number of columns in the header or if there are unbalanced quotations.
    """
    invalid_emails_sent = 0
    try:
        with open(csv_file_path, newline='') as file:
            reader = csv.reader(file)
            rows = list(reader)  # Read all rows into a list

            # Extract the header and determine the number of columns
            header = [line.strip() for line in rows[0] if line.strip()]
            num_columns = len(header)
            logging.info(f"Header: {header} (Columns: {num_columns})")

            # Iterate through each row after the header
            for row_index, row in enumerate(rows[1:], start=1):  # Skip the header row
                row = [field.strip() for field in row if field.strip()]  # Remove any leading/trailing spaces
                logging.debug(f"Row {row_index + 1}: {row}")

                

                # Check if the number of columns exceeds the header
                if len(row) > num_columns:
                    # Extract the first email from the row
                    first_email = row[num_columns - 1].strip()
                    if ',' in first_email:
                        first_email = first_email.split(',')[0].strip()
                    """
                     Sends an email to the first email address in the row.
                      """
                    sender_email = "20981a0583@raghuenggcollege.in"
                    password = "aofq drmq vdeb xbpb"
                    subject = "Invalid emails without proper formatting"
                    body = (
                        f"Row {row_index + 1} in the CSV file has more columns than the header or unbalanced quotations.\n"
                        f"Please check and correct the data."
                    )
                    msg = MIMEText(body)
                    msg['Subject'] = subject
                    msg['From'] = sender_email
                    msg['To'] = first_email

                    try:
                        server = smtplib.SMTP('smtp.gmail.com', 587)
                        server.starttls()
                        server.login(sender_email, password)
                        server.sendmail(sender_email, first_email, msg.as_string())
                        server.quit()
                        logging.info(f"Email sent to {first_email} for row {row_index + 1}.")
                        invalid_emails_sent += 1
                    except Exception as e:
                        logging.error(f"Failed to send email to {first_email}. Error: {e}")
        if invalid_emails_sent > 0:
            return True
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
    return False
def main():
    try:
      csvfile_ELcopies = "C:\\Users\\kadivya\\Documents\\ELcopies.csv"

      if validate_and_notify(csvfile_ELcopies):
        logging.info("Emails are not formatted properly.")
        return
      else:
        logging.info("Emails are formatted properly.")
      search_strings=Read_csv(csvfile_ELcopies)
      if create_supplier_email_dict(csvfile_ELcopies):
         receiver_email = create_supplier_email_dict(csvfile_ELcopies)

      else:
         logging.error("No proper formatting of email addresses in the CSV file.")
         return
    except FileNotFoundError:
      logging.error(f"The filepath {csvfile_ELcopies} not found")
    print("search_str",search_strings)
    print("receiver_email",receiver_email)
    if not search_strings:
         logging.error("No search strings found in the CSV file.")
         return
    try:
      EL_folderpath= "C:\\Users\\kadivya\\Documents\\EL_folder"
      output_file = "C:\\Users\\kadivya\\Documents\\EL_lines_output.txt"
      
      process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email)
      
        
    except FileNotFoundError:
      logging.error(f"The filepath {EL_folderpath} not found")
      return
    except Exception as e:
      logging.error(f"An error occurred: {e}")
      return


if __name__== '__main__':
    main()

search_str ['EL234555', 'EL234556', 'EL234557', 'EL234558', 'EL234559', 'EL456788', 'EL567888', 'EL897098']
receiver_email {'234555': 'divya@gmail.com,sunny@gmail.com,rakesh@gmail.com', '234556': 'ram@gmail.com', '234557': 'divya@gmail.com', '234558': 'jaya@gmail.com', '234559': 'ram@gmail.com', '456788': 'evan@gmail.com', '567888': 'ashley@gmail.com,gregory@gmail.com', '897098': 'federick@gmail.com,maya@gmail.com'}
missing_search_strings ['567888', '897098']
search_str_list {'234555', '234558', '234556', '456788', '234559', '234557'}
lookup_dict {'64649': {'sridivya@gmail.com', 'divyasrikarr@gmail.com'}, '52395': {'koushik', 'koll', '20981a0583@raghuenggcollege.in'}, '234555': {'divya@gmail.com', 'rakesh@gmail.com', 'sunny@gmail.com'}, '234558': {'jaya@gmail.com'}, '234556': {'ram@gmail.com'}, '234559': {'ram@gmail.com'}, '456788': {'evan@gmail.com'}, '234557': {'divya@gmail.com'}}
lookup_dict1 {}


In [None]:
try:
        current_lines=[]
        with open(EL_file, 'r') as EL_f:   
            with open('C:\\Users\\kadivya\\Documents\\EL_lines_output.txt', 'a') as output_file:
                for row in EL_f:
                    row=row.strip()
                    if row.startswith('0EL'):
                        if current_lines:
                            output_file.write("\n".join(current_lines) + "\n")
                            current_lines.clear()
                        current_lines.append(row)
                        
                    else:
                       current_lines.append(row)
                output_file.write("\n".join(current_lines) + "\n")
                current_lines.clear()
                logging.info(f"EL copies extracted and appended to the file EL_lines_output.txt")
    except Exception as e:
        logging.error(f"An error occurred while writing to the file EL_lines_output.txt: {e}")


In [118]:
#EL COPIES EXTRACTOR

import logging
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import smtplib
from email.mime.text import MIMEText
import csv

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    filename='EL_copy_logging.log',filemode='w',
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
    
)

processed_records=0
search_str_list = set()

def update_lookup_file(search_str_list, receiver_email):
    """
    Updates the lookup file with search strings and associated emails.
    If a search string already exists, it appends new emails to the existing line.
    """
    lookup_file_path = "C:\\Users\\kadivya\\Downloads\\lookup.csv"
    
    # Read existing lines from the file
    try:
        with open(lookup_file_path, 'r') as lookup:
            lines = [line.strip() for line in lookup.readlines()]
            header = lines[0] if lines else "Supplier_Number,Company_Name,Emails"  # Preserve the header or create a default one
            existing_lines = lines[1:] if len(lines) > 1 else []  # Existing data without header
    except FileNotFoundError:
        # If the file doesn't exist, initialize an empty list
        logging.info("Lookup file not found")
        
    
    # Convert existing lines to a dictionary for easier updates
    lookup_dict = {}
    lookup_dict1 = {}
    for line in existing_lines:
        parts = line.split(",")
        search_string = parts[0].strip()
        emails = [email.strip() for email in parts[1:] if email.strip()]
        lookup_dict[search_string] = set(emails)
    
    # Update the dictionary with new search strings and emails
    for search_str in search_str_list:
        if search_str in lookup_dict:
            # Append new emails to the existing set
            lookup_dict[search_str].update(receiver_email.get(search_str, "").split(","))
        else:
            # Add a new entry for the search string
            lookup_dict1[search_str] = set(receiver_email[search_str].split(","))   
    print("lookup_dict",lookup_dict)
    print("lookup_dict1",lookup_dict1)
    result={**lookup_dict,**lookup_dict1}
    # Prepare updated lines
    updated_lines = [header] if header else []
    for search_str, emails in result.items():
        updated_lines.append(f"{search_str}, ,{','.join(emails)}")

        

    # Write the updated content back to the file
    try:
        with open(lookup_file_path, 'w') as lookup:
            lookup.write("\n".join(updated_lines) + "\n")
        logging.info(f"Updated lookup file with search strings: {search_str_list}")
    except Exception as e:
        logging.error(f"Error writing to lookup file: {e}")

def send_batch_emails(missing_search_strings, receiver_email):
    """
    Sends a batch email notification for all missing search strings.
    Validates email addresses and notifies valid recipients about invalid emails.
    """
    email_set = set()
  
    if not missing_search_strings:
        return
    
    for missin_ in missing_search_strings:
        if missin_ in list(receiver_email.keys()):
            email_set.update(receiver_email[missin_].split(","))
        
        
        sender_email = "20981a0583@raghuenggcollege.in"
        password = "aofq drmq vdeb xbpb"
        subject = "Missing Search Strings Notification"
        body=f"The following search strings were not found in any file:\n\n" + "\n".join(missing_search_strings)
           
        
        msg = MIMEText(body)
        msg['Subject'] = subject
        msg['From'] = sender_email
        msg['To'] = ",".join(email_set)

        try:
            server = smtplib.SMTP('smtp.gmail.com', 587)
            server.starttls()
            server.login(sender_email, password)
            server.sendmail(sender_email, email_set, msg.as_string())
            logging.warning(f"Batch email sent for missing search strings to valid emails {email_set}.")
        except Exception as e:
            logging.error(f"Failed to send email to recipients. Error: {email_set}")
        email_set.clear()

def process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email):
    """
    Processes all files in the folder to search for multiple strings.
    """
    try:
        
        files = os.listdir(EL_folderpath)
        file_paths = [os.path.join(EL_folderpath, f) for f in files]  # Full paths
        missing_search_strings = []
        global processed_records 

        with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            futures = {executor.submit(extracting_ELcopies, file_paths, search_str): search_str for search_str in search_strings}
            with open(output_file, 'a') as out_file:
                for future in as_completed(futures):
                    search_str = futures[future]
                    matched_lines = future.result()
                    if matched_lines:
                        out_file.writelines(matched_lines)
                        processed_records += 1
                        logging.info(f"Processed record for search string: {search_str}")
                    else:
                        missing_search_strings.append(search_str[2:])
        print("missing_search_strings",missing_search_strings)
        print("search_str_list",search_str_list)
        update_lookup_file(search_str_list,receiver_email)
        # Send batch email for missing search strings
        send_batch_emails(missing_search_strings, receiver_email)

    except FileNotFoundError:
        logging.error(f"Error: The folder '{EL_folderpath}' was not found.")


def extracting_ELcopies(EL_files, search_str):
    """
    Function to extract EL copies from the CSV file and append them to a new file.
    """
    matched_lines = []
    search_str="0"+ str(search_str)
    for file_path in EL_files:
        if not os.path.isfile(file_path):
            logging.error(f"Error: The file '{file_path}' was not found.")
            continue
        try:
            with open(file_path, 'r') as file:
                EL_f=file.readlines()
                for line in EL_f:

                    stripped_line = line.strip()
                    print("stripped_line",stripped_line)
                    if search_str in stripped_line[:10]:
                        search_str_list.add(search_str[3:10])
                        matched_lines.append(stripped_line + "\n")
                        print("matched_lines1",matched_lines)
                        continue
                    if matched_lines and not stripped_line.startswith('0EL'):
                        matched_lines.append(stripped_line + "\n")
                        print("matched_lines2",matched_lines)
                        continue
                    if stripped_line.startswith('0EL')  and matched_lines:
                        return matched_lines
                        
                        
                        
                    
                logging.info(f"EL copies extracted and appended to the file EL_lines_output.txt")
        except FileNotFoundError:
            logging.error(f"Error: The file '{file_path}' was not found.")
    

    
def Read_csv(csvfile_ELcopies):
    """
    Function to read a CSV file and return a list of search strings.
    """
    try:
        with open(csvfile_ELcopies, 'r') as csvfile:
            lines=[line.strip() for line in csvfile.readlines() if line.strip()]
            existing_lines=lines[1:] if len(lines)>1 else []
            for line in existing_lines:
                search_strings=[line.split(',')[0].strip() for line in existing_lines if line.strip()]
        logging.info(f"CSV file {csvfile} read successfully.")
        return search_strings
    except FileNotFoundError:
        logging.error(f"CSV file {csvfile} not found.")
        return []
    except Exception as e:
        logging.error(f"An error occurred while reading the CSV file: {e}")
        return []

def create_supplier_email_dict(csv_file_path):
    """
    Reads a CSV file and creates a dictionary with Supplier_Number as the key and Email as the value.

    :param csv_file_path: Path to the CSV file
    :return: Dictionary with Supplier_Number as keys and Email as values
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file_path)

        # Ensure the required columns exist
        if 'String' not in df.columns or 'Email' not in df.columns:
            raise KeyError("The required columns 'Supplier_Number' and 'Email' are missing in the CSV file.")

        # Create the dictionary
        df['String'] = df['String'].str[2:]
        supplier_email_dict = dict(zip(df['String'], df['Email']))
        logging.info(f"Supplier email dictionary created successfully from {csv_file_path}.")
        return supplier_email_dict
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
        return {}
    except KeyError as e:
        logging.error(f"Error: {e}")
        return {}
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        return {}

def validate_and_notify(csv_file_path):
    """
    Validates each row in the CSV file and sends an email if the number of columns
    in a row exceeds the number of columns in the header or if there are unbalanced quotations.
    """
    invalid_emails_sent = 0
    try:
        with open(csv_file_path, newline='') as file:
            reader = csv.reader(file)
            rows = list(reader)  # Read all rows into a list

            # Extract the header and determine the number of columns
            header = [line.strip() for line in rows[0] if line.strip()]
            num_columns = len(header)
            logging.info(f"Header: {header} (Columns: {num_columns})")

            # Iterate through each row after the header
            for row_index, row in enumerate(rows[1:], start=1):  # Skip the header row
                row = [field.strip() for field in row if field.strip()]  # Remove any leading/trailing spaces
                logging.debug(f"Row {row_index + 1}: {row}")

                

                # Check if the number of columns exceeds the header
                if len(row) > num_columns:
                    # Extract the first email from the row
                    first_email = row[num_columns - 1].strip()
                    if ',' in first_email:
                        first_email = first_email.split(',')[0].strip()
                    """
                     Sends an email to the first email address in the row.
                      """
                    sender_email = "20981a0583@raghuenggcollege.in"
                    password = "aofq drmq vdeb xbpb"
                    subject = "Invalid emails without proper formatting"
                    body = (
                        f"Row {row_index + 1} in the CSV file has more columns than the header or unbalanced quotations.\n"
                        f"Please check and correct the data."
                    )
                    msg = MIMEText(body)
                    msg['Subject'] = subject
                    msg['From'] = sender_email
                    msg['To'] = first_email

                    try:
                        server = smtplib.SMTP('smtp.gmail.com', 587)
                        server.starttls()
                        server.login(sender_email, password)
                        server.sendmail(sender_email, first_email, msg.as_string())
                        server.quit()
                        logging.info(f"Email sent to {first_email} for row {row_index + 1}.")
                        invalid_emails_sent += 1
                    except Exception as e:
                        logging.error(f"Failed to send email to {first_email}. Error: {e}")
        if invalid_emails_sent > 0:
            return True
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
        
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        
    return False
def main():
    try:
      csvfile_ELcopies = "C:\\Users\\kadivya\\Documents\\ELcopies.csv"

      if validate_and_notify(csvfile_ELcopies):
        logging.info("Emails are not formatted properly.")
        return
      else:
        logging.info("Emails are formatted properly.")

      search_strings=Read_csv(csvfile_ELcopies)
      if  create_supplier_email_dict(csvfile_ELcopies):
         receiver_email = create_supplier_email_dict(csvfile_ELcopies)

      else:
         logging.error("No proper formatting of email addresses in the CSV file.")
         return
    except FileNotFoundError:
      logging.error(f"The filepath {csvfile_ELcopies} not found")
    print("search_str",search_strings)
    print("receiver_email",receiver_email)
    if not search_strings:
         logging.error("No search strings found in the CSV file.")
         return
    try:
      EL_folderpath= "C:\\Users\\kadivya\\Documents\\EL_folder"
      output_file = "C:\\Users\\kadivya\\Documents\\EL_lines_output.txt"
      
      process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email)
      
        
    except FileNotFoundError:
      logging.error(f"The filepath {EL_folderpath} not found")
      return
    except Exception as e:
      logging.error(f"An error occurred: {e}")
      return


if __name__== '__main__':
    main()

search_str ['EL234555', 'EL234556', 'EL234557', 'EL234558', 'EL234559', 'EL456788', 'EL567888', 'EL897098']
receiver_email {'234555': 'divya@gmail.com,sunny@gmail.com,rakesh@gmail.com', '234556': 'ram@gmail.com', '234557': 'divya@gmail.com"', '234558': 'jaya@gmail.com', '234559': 'ram@gmail.com', '456788': 'evan@gmail.com', '567888': 'ashley@gmail.com,gregory@gmail.com', '897098': 'federick@gmail.com,maya@gmail.com'}
stripped_line 0EL523545, Data1, More Data
stripped_line 0EL532445, Data2, Additional Data
stripped_line 0EL523455, Example Data
stripped_line here and then
stripped_line 0EL234555, Random Information
matched_lines1 ['0EL234555, Random Information\n']
stripped_line Example information noted here
matched_lines2 ['0EL234555, Random Information\n', 'Example information noted here\n']
stripped_line 0EL523545, Data1, More Data
stripped_line 0EL532445, Data2, Additional Data
stripped_line 0EL523455, Example Data
stripped_line here and then
stripped_line 0EL234555, Random Informat

In [None]:
#EL COPIES EXTRACTOR DUPLICATE

import logging
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import smtplib
from email.mime.text import MIMEText
import csv

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    filename='EL_copy_logging.log',filemode='w',
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
    
)

processed_records=0
search_str_list = set()

def update_lookup_file(search_str_list, receiver_email):
    """
    Updates the lookup file with search strings and associated emails.
    If a search string already exists, it appends new emails to the existing line.
    """
    lookup_file_path = "C:\\Users\\kadivya\\Downloads\\lookup.csv"
    
    # Read existing lines from the file
    try:
        with open(lookup_file_path, 'r') as lookup:
            lines = [line.strip() for line in lookup.readlines()]
            header = lines[0] if lines else "Supplier_Number,Company_Name,Emails"  # Preserve the header or create a default one
            existing_lines = lines[1:] if len(lines) > 1 else []  # Existing data without header
    except FileNotFoundError:
        # If the file doesn't exist, initialize an empty list
        logging.info("Lookup file not found")
        
    
    # Convert existing lines to a dictionary for easier updates
    lookup_dict = {}
    lookup_dict1 = {}
    for line in existing_lines:
        parts = line.split(",")
        search_string = parts[0].strip()
        emails = [email.strip() for email in parts[1:] if email.strip()]
        lookup_dict[search_string] = set(emails)
    
    # Update the dictionary with new search strings and emails
    for search_str in search_str_list:
        if search_str in lookup_dict:
            # Append new emails to the existing set
            lookup_dict[search_str].update(receiver_email.get(search_str, "").split(","))
        else:
            # Add a new entry for the search string
            lookup_dict1[search_str] = set(receiver_email[search_str].split(","))   
    print("lookup_dict",lookup_dict)
    print("lookup_dict1",lookup_dict1)
    result={**lookup_dict,**lookup_dict1}
    # Prepare updated lines
    updated_lines = [header] if header else []
    for search_str, emails in result.items():
        updated_lines.append(f"{search_str}, ,{','.join(emails)}")

        

    # Write the updated content back to the file
    try:
        with open(lookup_file_path, 'w') as lookup:
            lookup.write("\n".join(updated_lines) + "\n")
        logging.info(f"Updated lookup file with search strings: {search_str_list}")
    except Exception as e:
        logging.error(f"Error writing to lookup file: {e}")

def send_batch_emails(missing_search_strings, receiver_email):
    """
    Sends a batch email notification for all missing search strings.
    Validates email addresses and notifies valid recipients about invalid emails.
    """
    email_set = set()
  
    if not missing_search_strings:
        return
    
    for missin_ in missing_search_strings:
        if missin_ in list(receiver_email.keys()):
            email_set.update(receiver_email[missin_].split(","))
        
        
        sender_email = "20981a0583@raghuenggcollege.in"
        password = "aofq drmq vdeb xbpb"
        subject = "Missing Search Strings Notification"
        body=f"The following search strings were not found in any file:\n\n" + "\n".join(missing_search_strings)
           
        
        msg = MIMEText(body)
        msg['Subject'] = subject
        msg['From'] = sender_email
        msg['To'] = ",".join(email_set)

        try:
            server = smtplib.SMTP('smtp.gmail.com', 587)
            server.starttls()
            server.login(sender_email, password)
            server.sendmail(sender_email, email_set, msg.as_string())
            logging.warning(f"Batch email sent for missing search strings to valid emails {email_set}.")
        except Exception as e:
            logging.error(f"Failed to send email to recipients. Error: {email_set}")
        email_set.clear()

def process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email):
    """
    Processes all files in the folder to search for multiple strings.
    """
    try:
        
        files = os.listdir(EL_folderpath)
        file_paths = [os.path.join(EL_folderpath, f) for f in files]  # Full paths
        missing_search_strings = []
        global processed_records 

        with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            futures = {executor.submit(extracting_ELcopies, file_paths, search_str): search_str for search_str in search_strings}
            with open(output_file, 'a') as out_file:
                for future in as_completed(futures):
                    search_str = futures[future]
                    matched_lines = future.result()
                    if matched_lines:
                        out_file.writelines(matched_lines)
                        processed_records += 1
                        logging.info(f"Processed record for search string: {search_str}")
                    else:
                        missing_search_strings.append(search_str[2:])
        print("missing_search_strings",missing_search_strings)
        print("search_str_list",search_str_list)
        update_lookup_file(search_str_list,receiver_email)
        # Send batch email for missing search strings
        send_batch_emails(missing_search_strings, receiver_email)

    except FileNotFoundError:
        logging.error(f"Error: The folder '{EL_folderpath}' was not found.")


def extracting_ELcopies(EL_files, search_str):
    """
    Function to extract EL copies from the CSV file and append them to a new file.
    """
    matched_lines = []
    search_str="0"+ str(search_str)
    for file_path in EL_files:
        if not os.path.isfile(file_path):
            logging.error(f"Error: The file '{file_path}' was not found.")
            continue
        try:
            with open(file_path, 'r') as file:
                EL_f=file.readlines()
                for line in EL_f:

                    stripped_line = line.strip()
                    print("stripped_line",stripped_line)
                    if search_str in stripped_line[:10]:
                        search_str_list.add(search_str[3:10])
                        matched_lines.append(stripped_line + "\n")
                        print("matched_lines1",matched_lines)
                        continue
                    if matched_lines and not stripped_line.startswith('0EL'):
                        matched_lines.append(stripped_line + "\n")
                        print("matched_lines2",matched_lines)
                        continue
                    if stripped_line.startswith('0EL')  and matched_lines:
                        return matched_lines
                        
                        
                        
                    
                logging.info(f"EL copies extracted and appended to the file EL_lines_output.txt")
        except FileNotFoundError:
            logging.error(f"Error: The file '{file_path}' was not found.")
    

    
def Read_csv(csvfile_ELcopies):
    """
    Function to read a CSV file and return a list of search strings.
    """
    try:
        with open(csvfile_ELcopies, 'r') as csvfile:
            lines=[line.strip() for line in csvfile.readlines() if line.strip()]
            existing_lines=lines[1:] if len(lines)>1 else []
            for line in existing_lines:
                search_strings=[line.split(',')[0].strip() for line in existing_lines if line.strip()]
        logging.info(f"CSV file {csvfile} read successfully.")
        return search_strings
    except FileNotFoundError:
        logging.error(f"CSV file {csvfile} not found.")
        return []
    except Exception as e:
        logging.error(f"An error occurred while reading the CSV file: {e}")
        return []

def create_supplier_email_dict(csv_file_path):
    """
    Reads a CSV file and creates a dictionary with Supplier_Number as the key and Email as the value.

    :param csv_file_path: Path to the CSV file
    :return: Dictionary with Supplier_Number as keys and Email as values
    """
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file_path)

        # Ensure the required columns exist
        if 'String' not in df.columns or 'Email' not in df.columns:
            raise KeyError("The required columns 'Supplier_Number' and 'Email' are missing in the CSV file.")

        # Create the dictionary
        df['String'] = df['String'].str[2:]
        supplier_email_dict = dict(zip(df['String'], df['Email']))
        logging.info(f"Supplier email dictionary created successfully from {csv_file_path}.")
        return supplier_email_dict
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
        return {}
    except KeyError as e:
        logging.error(f"Error: {e}")
        return {}
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        return {}

def validate_and_notify(csv_file_path):
    """
    Validates each row in the CSV file and sends an email if the number of columns
    in a row exceeds the number of columns in the header or if there are unbalanced quotations.
    """
    invalid_emails_sent = 0
    try:
        with open(csv_file_path, newline='') as file:
            reader = csv.reader(file)
            rows = list(reader)  # Read all rows into a list

            # Extract the header and determine the number of columns
            header = [line.strip() for line in rows[0] if line.strip()]
            num_columns = len(header)
            logging.info(f"Header: {header} (Columns: {num_columns})")

            # Iterate through each row after the header
            for row_index, row in enumerate(rows[1:], start=1):  # Skip the header row
                row = [field.strip() for field in row if field.strip()]  # Remove any leading/trailing spaces
                logging.debug(f"Row {row_index + 1}: {row}")

                

                # Check if the number of columns exceeds the header
                if len(row) > num_columns:
                    # Extract the first email from the row
                    first_email = row[num_columns - 1].strip()
                    if ',' in first_email:
                        first_email = first_email.split(',')[0].strip()
                    """
                     Sends an email to the first email address in the row.
                      """
                    sender_email = "20981a0583@raghuenggcollege.in"
                    password = "aofq drmq vdeb xbpb"
                    subject = "Invalid emails without proper formatting"
                    body = (
                        f"Row {row_index + 1} in the CSV file has more columns than the header or unbalanced quotations.\n"
                        f"Please check and correct the data."
                    )
                    msg = MIMEText(body)
                    msg['Subject'] = subject
                    msg['From'] = sender_email
                    msg['To'] = first_email

                    try:
                        server = smtplib.SMTP('smtp.gmail.com', 587)
                        server.starttls()
                        server.login(sender_email, password)
                        server.sendmail(sender_email, first_email, msg.as_string())
                        server.quit()
                        logging.info(f"Email sent to {first_email} for row {row_index + 1}.")
                        invalid_emails_sent += 1
                    except Exception as e:
                        logging.error(f"Failed to send email to {first_email}. Error: {e}")
        if invalid_emails_sent > 0:
            return True
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_file_path}' was not found.")
        
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        
    return False
def main():
    try:
      csvfile_ELcopies = "C:\\Users\\kadivya\\Documents\\ELcopies.csv"

      if validate_and_notify(csvfile_ELcopies):
        logging.info("Emails are not formatted properly.")
        return
      else:
        logging.info("Emails are formatted properly.")

      search_strings=Read_csv(csvfile_ELcopies)
      if  create_supplier_email_dict(csvfile_ELcopies):
         receiver_email = create_supplier_email_dict(csvfile_ELcopies)

      else:
         logging.error("No proper formatting of email addresses in the CSV file.")
         return
    except FileNotFoundError:
      logging.error(f"The filepath {csvfile_ELcopies} not found")
    print("search_str",search_strings)
    print("receiver_email",receiver_email)
    if not search_strings:
         logging.error("No search strings found in the CSV file.")
         return
    try:
      EL_folderpath= "C:\\Users\\kadivya\\Documents\\EL_folder"
      output_file = "C:\\Users\\kadivya\\Documents\\EL_lines_output.txt"
      
      process_all_files_in_folder(EL_folderpath, search_strings, output_file,receiver_email)
      
        
    except FileNotFoundError:
      logging.error(f"The filepath {EL_folderpath} not found")
      return
    except Exception as e:
      logging.error(f"An error occurred: {e}")
      return


if __name__== '__main__':
    main()