In [7]:
import re
import csv

def preprocess_text(text):
    # Remove special characters, symbols, numbers (digits), and preserve periods (.)
    cleaned_text = re.sub(r'[^\w\s.]|\d', '', text)

    # Remove multiple spaces and replace with a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    # Remove leading and trailing spaces
    cleaned_text = cleaned_text.strip()

    return cleaned_text

def is_meaningful_sentence(sentence):
    # Check if the sentence has less than 3 words or only contains dots
    words = sentence.split()
    return len(words) >= 3 and not all(word == '.' for word in words)

def remove_trailing_fullstop(sentence):
    # Remove the trailing full stop (period) from the sentence
    return sentence.rstrip('.')

def convert_to_csv(text, output_csv):
    # Split the text into sentences based on periods followed by a space
    sentences = re.split(r'(?<=\.) ', text)

    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        for sentence in sentences:
            if is_meaningful_sentence(sentence):
                cleaned_sentence = remove_trailing_fullstop(sentence)
                csv_writer.writerow([cleaned_sentence])

if __name__ == "__main__":
    text_file_path = "dbms_book.txt"
    output_csv_file = "output_file3.csv"

    with open(text_file_path, 'r', encoding='utf-8') as file:
        text_from_file = file.read()

    cleaned_text = preprocess_text(text_from_file)
    convert_to_csv(cleaned_text, output_csv_file)
