In [None]:
def extract_sentences_from_conll_file(input_file):
    sentences = []
    current_sentence = []
    
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                
                # Skip document start markers
                if line.startswith('-DOCSTART-'):
                    continue
                
                # Empty line indicates sentence boundary
                if not line:
                    if current_sentence:
                        sentences.append(' '.join(current_sentence))
                        current_sentence = []
                    continue
                
                # Split the line by spaces and get the token
                parts = line.split()
                if parts:
                    token = parts[0]
                    # Include punctuation but skip single hyphens
                    if token != '-':
                        current_sentence.append(token)
        
        # Add the last sentence if there is one
        if current_sentence:
            sentences.append(' '.join(current_sentence))
        
        return sentences
    except FileNotFoundError:
        print(f"Error: Could not find the file {input_file}")
        return []
    except Exception as e:
        print(f"Error reading file: {str(e)}")
        return []

def write_sentences_to_text_file(sentences, output_file):
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            for sentence in sentences:
                f.write(f"{sentence}\n")
        print(f"Successfully wrote {len(sentences)} sentences to {output_file}")
    except Exception as e:
        print(f"Error writing to file: {str(e)}")

# Example usage

In [None]:
input_file = r"c:\Users\Sakib Ahmed\Desktop\yours.conll"  # Replace with your input file path
output_file = 'sentences.txt'

sentences = extract_sentences_from_conll_file(input_file)
if sentences:
    write_sentences_to_text_file(sentences, output_file)
    
    # Print first few sentences as a test
    print("\nFirst few sentences extracted:")
    for i, sentence in enumerate(sentences[:3], 1):
        print(f"Sentence {i}: {sentence}")