In [2]:
# Install pandas
!python3 -m pip install pandas

import os
import pandas as pd
from collections import Counter

def extract_places(file_content):

#Extracts places tagged with B-LOC and I-LOC from the file content.
    
    places = []
    lines = file_content.splitlines()
    current_place = []
    for line in lines:
        if line.strip():  # Ignore empty lines
            parts = line.split()
            if len(parts) > 1:
                word = parts[0]
                tag = parts[-1]
                if tag == 'B-LOC':
                    if current_place:
                        places.append(" ".join(current_place))
                    current_place = [word]
                elif tag == 'I-LOC' and current_place:
                    current_place.append(word)
                else:
                    if current_place:
                        places.append(" ".join(current_place))
                        current_place = []
    if current_place:
        places.append(" ".join(current_place))
    return places

def process_files(directory):

#Processes all .txt files in the given directory to extract places and their occurrences.

    data = []

    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                file_content = file.read()
            places = extract_places(file_content)
            occurrences = Counter(places)
            for place, count in occurrences.items():
                data.append([filename, place, count])

    return data

def save_to_csv(data, output_file):

#Saves the extracted data to a CSV file.

    df = pd.DataFrame(data, columns=['Name', 'Place', 'Occurrences'])
    df.to_csv(output_file, index=False)

# Define the directory containing the text files and the output CSV file name
directory = "../corpus_out"
output_file = "LOPE_PLACES.csv"

# Process the files and save the data to a CSV file
data = process_files(directory)
save_to_csv(data, output_file)

print(f"Data has been saved to {output_file}")


You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Data has been saved to LOPE_PLACES.csv
