In [1]:
import textstat
import pandas as pd
import spacy
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


CompletedProcess(args=['python', '-m', 'spacy', 'download', 'en_core_web_sm'], returncode=0)

In [2]:
# Load English SpaCy model
nlp = spacy.load("en_core_web_sm") #In terminal: python -m spacy download en_core_web_sm

# Download concreteness lexicon
# Direct URL to the CSV file (latest version, Conc.M = concreteness mean)
url = "hf://datasets/StephanAkkerman/MRC-psycholinguistic-database/mrc_psycholinguistic_database.csv"
# Load into pandas
lexicon = pd.read_csv(url)[['Word','Concreteness', 'Imageability']]
lexicon["Word"] = lexicon["Word"].str.lower()
lexicon.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,Word,Concreteness,Imageability
0,&arry,0,0
1,&cello,0,0
2,&d,0,0
3,&em,0,0
4,&flu,0,0


In [3]:
# Define the function

def analyze_text(text):
    doc = nlp(text)
    words = [token.text.lower() for token in doc if token.is_alpha]

    # Readability (Flesch Reading Ease)
    readability = textstat.flesch_reading_ease(text)

    # Imageability & Concreteness (averages)
    imageability_scores = [
        lexicon.loc[lexicon['Word'] == word, 'Imageability'].values[0]
        for word in words if word in lexicon['Word'].values and 'Imageability' in lexicon.columns
    ]
    concreteness_scores = [
        lexicon.loc[lexicon['Word'] == word, 'Concreteness'].values[0]
        for word in words if word in lexicon['Word'].values and 'Concreteness' in lexicon.columns
    ]

    avg_imageability = sum(imageability_scores) / len(imageability_scores) if imageability_scores else None
    avg_concreteness = sum(concreteness_scores) / len(concreteness_scores) if concreteness_scores else None

    # Syntactic simplicity (average number of words per sentence)
    avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))

    return {
        "Readability": round(readability, 2),
        "Imageability": round(avg_imageability, 2) if avg_imageability else "N/A",
        "Concreteness": round(avg_concreteness, 2) if avg_concreteness else "N/A",
        "Syntactic simplicity (avg sentence length)": round(avg_sentence_length, 2)
    }


In [4]:
# Exemple d'utilisation
poems=pd.read_excel("/home/onyxia/work/Phoetry/generated_poems/poem_classic_.xlsx")

poems.head()

Unnamed: 0,theme,text
0,moon,Then we see the moon shining on her face. It's...
1,leaf,I only I could have the leaf. But it's not lik...
2,flower,For I am the flower of life. And that is what ...
3,tree,Then we see the tree fall down to its own litt...
4,sun,I only I could have the sun. When it was dark ...


In [5]:
poems["Readability"]=poems["text"].apply(lambda x: analyze_text(x)["Readability"])
poems["Imageability"]=poems["text"].apply(lambda x: analyze_text(x)["Imageability"])
poems["Concreteness"]=poems["text"].apply(lambda x: analyze_text(x)["Concreteness"])
poems["Syntactic simplicity"]=poems["text"].apply(lambda x: analyze_text(x)["Syntactic simplicity (avg sentence length)"])
poems


Unnamed: 0,theme,text,Readability,Imageability,Concreteness,Syntactic simplicity
0,moon,Then we see the moon shining on her face. It's...,101.29,329.57,298.64,14.75
1,leaf,I only I could have the leaf. But it's not lik...,99.57,305.75,274.91,21.0
2,flower,For I am the flower of life. And that is what ...,85.73,279.95,251.09,20.33
3,tree,Then we see the tree fall down to its own litt...,89.08,293.44,261.56,18.0
4,sun,I only I could have the sun. When it was dark ...,99.57,288.22,267.02,16.5
5,sunset,For I am the sunset; for it is my light that s...,85.02,300.74,271.41,21.75
6,waterfall,Then we see the waterfall that is at first. It...,69.75,263.41,232.03,28.33
7,butterfly,I only I could have the butterfly. When it was...,90.29,304.07,265.86,17.4
8,bird,Then we see the bird come out of its shell. Th...,90.29,286.56,249.6,17.4
9,dog,For I am the dog who is to be eaten by wolves....,84.0,294.23,245.16,23.0


Unnamed: 0,theme,text,Readability,Imageability,Concreteness,Syntactic simplicity
0,moon,Then we see the moon shining on her face. It's...,1.0,1.0,1.0,0.01
1,leaf,I only I could have the leaf. But it's not lik...,0.95,0.65,0.64,0.46
2,flower,For I am the flower of life. And that is what ...,0.58,0.28,0.29,0.41
3,tree,Then we see the tree fall down to its own litt...,0.67,0.47,0.44,0.24
4,sun,I only I could have the sun. When it was dark ...,0.95,0.4,0.53,0.13
5,sunset,For I am the sunset; for it is my light that s...,0.56,0.58,0.59,0.52
6,waterfall,Then we see the waterfall that is at first. It...,0.15,0.03,0.0,1.0
7,butterfly,I only I could have the butterfly. When it was...,0.7,0.63,0.51,0.2
8,bird,Then we see the bird come out of its shell. Th...,0.7,0.37,0.26,0.2
9,dog,For I am the dog who is to be eaten by wolves....,0.53,0.48,0.2,0.61


In [10]:
poems_normalized.describe()

Unnamed: 0,Readability,Imageability,Concreteness,Syntactic simplicity
count,13.0,13.0,13.0,13.0
mean,0.617692,0.475385,0.441538,0.395385
std,0.29098,0.289759,0.281509,0.315479
min,0.0,0.0,0.0,0.0
25%,0.53,0.37,0.26,0.2
50%,0.67,0.47,0.44,0.41
75%,0.72,0.63,0.59,0.52
max,1.0,1.0,1.0,1.0


In [11]:
poems_normalized.to_excel("Scores_poems.xlsx")