In [1]:
import textstat
import pandas as pd
import spacy
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


CompletedProcess(args=['python', '-m', 'spacy', 'download', 'en_core_web_sm'], returncode=0)

In [2]:
# Load English SpaCy model
nlp = spacy.load("en_core_web_sm") #In terminal: python -m spacy download en_core_web_sm

# Download concreteness lexicon
# Direct URL to the CSV file (latest version, Conc.M = concreteness mean)
url = "hf://datasets/StephanAkkerman/MRC-psycholinguistic-database/mrc_psycholinguistic_database.csv"
# Load into pandas
lexicon = pd.read_csv(url)[['Word','Concreteness', 'Imageability']]
lexicon["Word"] = lexicon["Word"].str.lower()
lexicon.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,Word,Concreteness,Imageability
0,&arry,0,0
1,&cello,0,0
2,&d,0,0
3,&em,0,0
4,&flu,0,0


In [3]:
# Define the function

def analyze_text(text):
    doc = nlp(text)
    words = [token.text.lower() for token in doc if token.is_alpha]

    # Readability (Flesch Reading Ease)
    readability = textstat.flesch_reading_ease(text)

    # Imageability & Concreteness (averages)
    imageability_scores = [
        lexicon.loc[lexicon['Word'] == word, 'Imageability'].values[0]
        for word in words if word in lexicon['Word'].values and 'Imageability' in lexicon.columns
    ]
    concreteness_scores = [
        lexicon.loc[lexicon['Word'] == word, 'Concreteness'].values[0]
        for word in words if word in lexicon['Word'].values and 'Concreteness' in lexicon.columns
    ]

    avg_imageability = sum(imageability_scores) / len(imageability_scores) if imageability_scores else None
    avg_concreteness = sum(concreteness_scores) / len(concreteness_scores) if concreteness_scores else None

    # Syntactic simplicity (average number of words per sentence)
    avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents))

    return {
        "Readability": round(readability, 2),
        "Imageability": round(avg_imageability, 2) if avg_imageability else "N/A",
        "Concreteness": round(avg_concreteness, 2) if avg_concreteness else "N/A",
        "Syntactic simplicity (avg sentence length)": round(avg_sentence_length, 2)
    }


## Classic

In [None]:
# Exemple d'utilisation
poems = pd.read_excel("/home/onyxia/work/Phoetry/generated_poems/poem_classic_.xlsx")

poems.head()

Unnamed: 0,theme,text
0,moon,Then we see the moon shining on her face. It's...
1,leaf,I only I could have the leaf. But it's not lik...
2,flower,For I am the flower of life. And that is what ...
3,tree,Then we see the tree fall down to its own litt...
4,sun,I only I could have the sun. When it was dark ...


In [31]:
poems["Readability"]=poems["text"].apply(lambda x: analyze_text(x)["Readability"])
poems["Imageability"]=poems["text"].apply(lambda x: analyze_text(x)["Imageability"])
poems["Concreteness"]=poems["text"].apply(lambda x: analyze_text(x)["Concreteness"])
poems["Syntactic simplicity"]=poems["text"].apply(lambda x: analyze_text(x)["Syntactic simplicity (avg sentence length)"])
poems


Unnamed: 0,theme,text,Readability,Imageability,Concreteness,Syntactic simplicity
0,moon,Then we see the moon shining on her face. It's...,101.29,329.57,298.64,14.75
1,leaf,I only I could have the leaf. But it's not lik...,99.57,305.75,274.91,21.0
2,flower,For I am the flower of life. And that is what ...,85.73,279.95,251.09,20.33
3,tree,Then we see the tree fall down to its own litt...,89.08,293.44,261.56,18.0
4,sun,I only I could have the sun. When it was dark ...,99.57,288.22,267.02,16.5
5,sunset,For I am the sunset; for it is my light that s...,85.02,300.74,271.41,21.75
6,waterfall,Then we see the waterfall that is at first. It...,69.75,263.41,232.03,28.33
7,butterfly,I only I could have the butterfly. When it was...,90.29,304.07,265.86,17.4
8,bird,Then we see the bird come out of its shell. Th...,90.29,286.56,249.6,17.4
9,dog,For I am the dog who is to be eaten by wolves....,84.0,294.23,245.16,23.0


In [None]:
poems.describe()

Unnamed: 0,Readability,Imageability,Concreteness,Syntactic simplicity
count,13.0,13.0,13.0,13.0
mean,87.191538,293.650769,261.428462,20.073846
std,10.79216,19.875319,18.767763,4.303194
min,64.34,261.06,232.03,14.67
25%,84.0,286.56,249.6,17.4
50%,89.08,293.44,261.56,20.33
75%,90.9,304.07,271.41,21.75
max,101.29,329.57,298.64,28.33


In [34]:
poems.to_excel("Scores_poems.xlsx")

## Haiku

In [7]:
# comparaison
haiku = pd.read_excel("/home/onyxia/work/Phoetry/generated_poems/poem_haiku_.xlsx")
haiku.head()

Unnamed: 0,theme,text
0,moon,The space between?\nMy fingers and lips. I ho...
1,leaf,The smell of grasshoppers. ppening. Grass In t...
2,flower,The sound of her hand. Sings in the wind. Jasm...
3,tree,A leaf falls. From the tree's branch. Leaf by...
4,sun,The shape of the moon. Sighs me outstretched....


In [8]:
haiku["Readability"] = haiku["text"].apply(lambda x: analyze_text(x)["Readability"])
haiku["Imageability"] = haiku["text"].apply(lambda x: analyze_text(x)["Imageability"])
haiku["Concreteness"] = haiku["text"].apply(lambda x: analyze_text(x)["Concreteness"])
haiku["Syntactic simplicity"] = haiku["text"].apply(lambda x: analyze_text(x)["Syntactic simplicity (avg sentence length)"])
haiku

Unnamed: 0,theme,text,Readability,Imageability,Concreteness,Syntactic simplicity
0,moon,The space between?\nMy fingers and lips. I ho...,92.8,244.17,226.83,4.5
1,leaf,The smell of grasshoppers. ppening. Grass In t...,83.83,335.38,322.88,3.0
2,flower,The sound of her hand. Sings in the wind. Jasm...,108.19,306.3,300.9,3.75
3,tree,A leaf falls. From the tree's branch. Leaf by...,117.87,393.0,379.85,4.75
4,sun,The shape of the moon. Sighs me outstretched....,101.56,233.8,237.8,5.0
5,sunset,A water lily's cry. lessening. whispers of My...,66.91,336.56,321.0,4.67
6,waterfall,The slow rise and fall of waterdrops. Waterdr...,83.62,304.92,263.08,5.67
7,butterfly,A single leaf. Pours out of the sky. Dreaming...,91.27,277.64,216.55,3.75
8,bird,The call of a hawk. Whisper Between us. Osprey's,92.29,294.78,279.67,4.33
9,dog,The scent of lilacs. Dapples in the air. Over...,83.32,184.0,191.2,3.5


In [9]:
haiku.describe()

Unnamed: 0,Readability,Imageability,Concreteness,Syntactic simplicity
count,13.0,13.0,13.0,13.0
mean,91.309231,286.625385,270.956923,4.205385
std,13.53585,53.61393,52.288306,0.717247
min,66.91,184.0,191.2,3.0
25%,83.62,254.23,237.38,3.75
50%,91.78,294.78,263.08,4.25
75%,98.72,306.3,300.9,4.67
max,117.87,393.0,379.85,5.67


In [12]:
haiku.to_excel("Scores_haiku.xlsx")