In [3]:
#απλός κώδικας για υπολογισμό lexical density συνολικά
import os
import pandas as pd
import spacy

# Load the large Spacy models for Greek and French
nlp_fr = spacy.load('fr_core_news_lg')
nlp_el = spacy.load('el_core_news_lg')

# Define the lexical tags
lexical_tags = {'NOUN', 'ADJ', 'VERB', 'PROPN', 'ADV', 'INTJ', 'NUM', 'X'}

def calculate_lexical_density(text, nlp_model):
    doc = nlp_model(text)
    lexical_words = [token for token in doc if token.pos_ in lexical_tags]
    if len(doc) == 0:
        return 0.0
    return len(lexical_words) / len(doc)

# Get the current directory
current_directory = os.getcwd()

# Iterate through each Excel file in the directory
for filename in os.listdir(current_directory):
    if filename.endswith(".xlsx"):
        # Load the Excel file
        df = pd.read_excel(os.path.join(current_directory, filename))

        # Check if the file has the required columns
        if df.shape[1] >= 2:
            greek_texts = df.iloc[:, 0]
            french_texts = df.iloc[:, 1]

            # Calculate Lexical Density for Greek and French text
            ld_el = greek_texts.apply(lambda text: calculate_lexical_density(str(text), nlp_el)).mean()
            ld_fr = french_texts.apply(lambda text: calculate_lexical_density(str(text), nlp_fr)).mean()

            # Print the results
            print(f"{filename}: LD FR = {ld_fr:.4f}, LD EL = {ld_el:.4f}")
        else:
            print(f"{filename}: Not enough columns for processing")



1921 CRAINQUEBILLE FRANCE PROTOPATSIS FR EL.xlsx: LD FR = 0.4249, LD EL = 0.4367
1923 MADAME BOVARY FLAUBERT THEOTOKIS FR EL.xlsx: LD FR = 0.4118, LD EL = 0.4386
1924 ENFER BARBUSSE UNKNOWN FR EL.xlsx: LD FR = 0.3904, LD EL = 0.4342
1925 ASSOMMOIR ZOLA KOTSIKAS FR EL.xlsx: LD FR = 0.4423, LD EL = 0.4489
1925 MADELEINE ZOLA SIMIRIOTIS FR EL.xlsx: LD FR = 0.4304, LD EL = 0.4295
1930 GRANDET BALZAC PAPALEXANDROU FR EL.xlsx: LD FR = 0.4352, LD EL = 0.4606
1934 BEL-AMI MAUPASSANT UNKNOWN FR EL.xlsx: LD FR = 0.4073, LD EL = 0.4370
1934 NUIT ZOLA UNKNOWN FR EL.xlsx: LD FR = 0.4602, LD EL = 0.4624
1935 GIDE PORTE GAZI FR EL.xlsx: LD FR = 0.4226, LD EL = 0.4104
1940 IMMORALISTE GIDE CHOURMOUZIOS FR EL.xlsx: LD FR = 0.4216, LD EL = 0.4630
1940 MOURET ZOLA PIKROS FR EL.xlsx: LD FR = 0.4182, LD EL = 0.4354
1943 THERESE ZOLA KONTOS FR EL.xlsx: LD FR = 0.4390, LD EL = 0.4700
1945 1793 HUGO MARGETIS FR EL.xlsx: LD FR = 0.4125, LD EL = 0.4517
1950 CHARTREUSE STENDHAL MPERATIS FR EL.xlsx: LD FR = 0.437

In [4]:
#οργάνωση αποτελεσμάτων σε excel
import os
import pandas as pd
from dotenv import load_dotenv

# Load environment variables from .env file, if used
load_dotenv()

# Retrieve the output directory from environment variables
output_directory = os.getenv('OUTPUT_DIRECTORY')

if not output_directory:
    raise ValueError("The OUTPUT_DIRECTORY environment variable is not set. Please set it.")

# Provided data
data = [
    ["1921 CRAINQUEBILLE FRANCE PROTOPATSIS FR EL.xlsx", 0.4249, 0.4367],
    ["1923 MADAME BOVARY FLAUBERT THEOTOKIS FR EL.xlsx", 0.4118, 0.4386],
    ["1924 ENFER BARBUSSE UNKNOWN FR EL.xlsx", 0.3904, 0.4342],
    ["1925 ASSOMMOIR ZOLA KOTSIKAS FR EL.xlsx", 0.4423, 0.4489],
    ["1925 MADELEINE ZOLA SIMIRIOTIS FR EL.xlsx", 0.4304, 0.4295],
    ["1930 GRANDET BALZAC PAPALEXANDROU FR EL.xlsx", 0.4352, 0.4606],
    ["1934 BEL-AMI MAUPASSANT UNKNOWN FR EL.xlsx", 0.4073, 0.4370],
    ["1934 NUIT ZOLA UNKNOWN FR EL.xlsx", 0.4602, 0.4624],
    ["1935 GIDE PORTE GAZI FR EL.xlsx", 0.4226, 0.4104],
    ["1940 IMMORALISTE GIDE CHOURMOUZIOS FR EL.xlsx", 0.4216, 0.4630],
    ["1940 MOURET ZOLA PIKROS FR EL.xlsx", 0.4182, 0.4354],
    ["1943 THERESE ZOLA KONTOS FR EL.xlsx", 0.4390, 0.4700],
    ["1945 1793 HUGO MARGETIS FR EL.xlsx", 0.4125, 0.4517],
    ["1950 CHARTREUSE STENDHAL MPERATIS FR EL.xlsx", 0.4376, 0.4584],
    ["1953 ASSOMMOIR  ZOLA PATATZIS FR EL.xlsx", 0.4423, 0.3999],
    ["1954 LE FEU BARBUSSE POLITIS FR EL.xlsx", 0.4020, 0.4576],
    ["1955 FAUX GIDE DIKTAIOS FR EL.xlsx", 0.4134, 0.4284],
    ["1960 NANA ZOLA PRATSIKAS FR EL.xlsx", 0.4247, 0.4506],
    ["1962 NOEUD MAURIAC OIKONOMOU FR EL.xlsx", 0.4038, 0.4419],
    ["1964 CHOUANS BALZAC ARGYROPOULOS FR EL.xlsx", 0.4413, 0.3952],
    ["1966 CHUTE CAMUS  POLENAKIS FR EL.xlsx", 0.4106, 0.4348],
    ["1971 COUSIN BALZAC THEOFANOUS FR EL.xlsx", 0.4324, 0.4457],
    ["1975 RÉVOLTE FRANCE DAFNI FR EL.xlsx", 0.4406, 0.4689],
    ["1975 VOLEUR GENET DIMITRIADIS FR EL.xlsx", 0.4066, 0.4327],
    ["1981 ASSOMMOIR ZOLA ASLANOGLOU FR EL.xlsx", 0.4359, 0.4663],
    ["1984 CHOUANS BALZAC ALEXANDROU FR EL.xlsx", 0.4322, 0.4523],
    ["1985 AMANT DURAS TSALIKIDOU FR EL.xlsx", 0.4077, 0.4425],
    ["1986 VOYAGE CELINE GIATRAKOU-FOSSI FR EL.xlsx", 0.4122, 0.4357],
    ["1987 CHUTE CAMUS EFTHIMIADOU  FR EL.xlsx", 0.4113, 0.4397],
    ["1990 PESTE CAMUS TATANI FR EL.xlsx", 0.4244, 0.4399],
    ["1992 GUERRE PERGAUD KONDYLIS FR EL.xlsx", 0.4232, 0.4283],
    ["1995 TESTE VALERY PATRIKIOS FR EL.xlsx", 0.4159, 0.4421],
    ["1998 ETRANGER CAMUS KARAKITSOU KASAMPALOGLOU FR EL.xlsx", 0.3880, 0.4568],
    ["1998 MEAULNES ALAIN FOURNIER PALLANTIOU FR EL.xlsx", 0.4288, 0.4744]
]

# Create a DataFrame
df = pd.DataFrame(data, columns=["Title", "LD FR", "LD EL"])

# Ensure the directory exists
os.makedirs(output_directory, exist_ok=True)

# Define the output file path
output_file = os.path.join(output_directory, "lexical_density_results.xlsx")

# Save the DataFrame to the specified Excel file
df.to_excel(output_file, index=False)

print(f"Data has been written to {output_file}")


Data has been written to D:\ΔΙΔΑΚΤΟΡΙΚΗ ΠΡΟΤΑΣΗ\ΣΩΜΑ ΛΟΓΟΤΕΧΝΙΚΩΝ ΚΕΙΜΕΝΩΝ\ΠΑΡΑΛΛΗΛΟ ΗΣΚ ΑΝΑΦΟΡΑΣ\MARIA CORPUS DRIVE\EXCEL RETRIEVED\Outputs_FINAL\lexical_density_results.xlsx


In [5]:
#προσθήκη στήλης σύγκρισης μεταξύ LD FR & lD EL στο αρχείο excel
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables from .env file, if used
load_dotenv()

# Retrieve the input directory from environment variables
input_directory = os.getenv('INPUT_DIRECTORY')

if not input_directory:
    raise ValueError("The INPUT_DIRECTORY environment variable is not set. Please set it.")

# Define the input and output file paths
input_file = os.path.join(input_directory, "lexical_density_results.xlsx")
output_file = os.path.join(input_directory, "lexical_density_results_comparison.xlsx")

# Load the Excel file
df = pd.read_excel(input_file)

# Add the comparison column
df['Comparison'] = df.apply(lambda row: '>' if row['LD FR'] > row['LD EL'] else ('<' if row['LD FR'] < row['LD EL'] else '='), axis=1)

# Save the modified DataFrame to a new Excel file
df.to_excel(output_file, index=False)

print(f"Data with comparisons has been written to {output_file}")


Data with comparisons has been written to D:\ΔΙΔΑΚΤΟΡΙΚΗ ΠΡΟΤΑΣΗ\ΣΩΜΑ ΛΟΓΟΤΕΧΝΙΚΩΝ ΚΕΙΜΕΝΩΝ\ΠΑΡΑΛΛΗΛΟ ΗΣΚ ΑΝΑΦΟΡΑΣ\MARIA CORPUS DRIVE\EXCEL RETRIEVED\Outputs_FINAL\lexical_density_results_comparison.xlsx
