 ## Experiment 2: Linguistic analysis of the 'Second version correction' section

The information concerning this can be found in the section 8.2.3	Experiments on the 'Second version correction' Section

In [1]:
!pip install spacy nltk textblob textstat lexicalrichness wordfreq --quiet
!python -m nltk.downloader punkt
!python -m spacy download en_core_web_sm


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.8/97.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.2/239.2 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m84.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for lexicalrichness (setup.py) ... [?25l[?25hdone
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Collecting en-core-web-sm==3.8.0
  Downloading http

In [2]:
import spacy
import re
import csv
from lexicalrichness import LexicalRichness
from textblob import TextBlob
from collections import Counter
import textstat
from wordfreq import word_frequency


nlp = spacy.load("en_core_web_sm")


In [3]:
CONTENT_POS = {"NOUN", "VERB", "ADJ", "ADV"}
NOMINAL_SUFFIXES = ['tion', 'ment', 'ness', 'ity', 'ance', 'ence', 'ship', 'al']

def analyze_text(text):
    doc = nlp(text)
    blob = TextBlob(text)
    lex = LexicalRichness(text)
    tokens = [token.text.lower() for token in doc if token.is_alpha]

    ttr = lex.ttr
    lexical_density = sum(1 for token in doc if token.pos_ in CONTENT_POS) / len(doc)
    rare_words = [token.text.lower() for token in doc if word_frequency(token.text.lower(), 'en') < 1e-5]
    rare_ratio = len(rare_words) / len(tokens)
    word_counts = Counter(tokens)
    repeated_words = {word: count for word, count in word_counts.items() if count > 2}
    nominalizations = [word for word in tokens if any(word.endswith(suf) for suf in NOMINAL_SUFFIXES)]

    sentences = list(doc.sents)
    avg_sent_length = sum(len(sent) for sent in sentences) / len(sentences)
    passive_voice = [tok for tok in doc if tok.dep_ == "auxpass"]

    clause_types = {'relcl': 0, 'ccomp': 0, 'advcl': 0, 'conj': 0}
    for token in doc:
        if token.dep_ in clause_types:
            clause_types[token.dep_] += 1

    modals = [token.text.lower() for token in doc if token.tag_ == 'MD']
    tenses = Counter([token.tag_ for token in doc if token.tag_ in ['VBD', 'VBP', 'VBZ', 'VBN']])
    logical_markers = re.findall(r'\b(however|although|because|therefore|thus|furthermore|moreover|whereas|despite|in contrast)\b', text.lower())
    examples = re.findall(r'\b(for example|for instance|such as|including)\b', text.lower())
    flesch = textstat.flesch_reading_ease(text)

    return {
        "TTR": round(ttr, 3),
        "Lexical Density": round(lexical_density, 3),
        "Rare Word Ratio": round(rare_ratio, 3),
        "Nominalizations": nominalizations,
        "Avg Sentence Length": round(avg_sent_length, 2),
        "Passive Voice Count": len(passive_voice),
        "Clause Types": clause_types,
        "Modality (modals)": modals,
        "Tense Counts": dict(tenses),
        "Logical Markers": logical_markers,
        "Examples Given": examples,
        "Flesch Reading Ease": flesch,
        "Polarity": blob.sentiment.polarity,
        "Subjectivity": blob.sentiment.subjectivity
    }


In [4]:
def compare_batch(pairs):
    results = []
    for i, (text1, text2) in enumerate(pairs, 1):
        print(f"\n Processing Pair {i}...\n")
        a = analyze_text(text1)
        b = analyze_text(text2)
        result = {
            "Pair": i,
            "TTR_A": a["TTR"], "TTR_B": b["TTR"],
            "LexDensity_A": a["Lexical Density"], "LexDensity_B": b["Lexical Density"],
            "RareWordRatio_A": a["Rare Word Ratio"], "RareWordRatio_B": b["Rare Word Ratio"],
            "SentLen_A": a["Avg Sentence Length"], "SentLen_B": b["Avg Sentence Length"],
            "Passive_A": a["Passive Voice Count"], "Passive_B": b["Passive Voice Count"],
            "Flesch_A": a["Flesch Reading Ease"], "Flesch_B": b["Flesch Reading Ease"],
            "Polarity_A": a["Polarity"], "Polarity_B": b["Polarity"],
            "Subjectivity_A": a["Subjectivity"], "Subjectivity_B": b["Subjectivity"],
            "Modal_A": ", ".join(a["Modality (modals)"]),
            "Modal_B": ", ".join(b["Modality (modals)"]),
            "Nominals_A": ", ".join(a["Nominalizations"]),
            "Nominals_B": ", ".join(b["Nominalizations"]),
            "LogicalMarkers_A": ", ".join(a["Logical Markers"]),
            "LogicalMarkers_B": ", ".join(b["Logical Markers"]),
        }
        results.append(result)
    return results


In [5]:
#Input Text Pairs
text_pairs = [
    (
        """Differences:
I added the before "mamona" to specify the dish you are looking forward to trying. This makes it clearer in English.

Your second version is much improved! You used more precise language and created a friendly tone.

Now, let's discuss how you can further improve your text to sound even more natural and native-like:

1.	Use of Articles: You correctly added "the" before "mamona," which is excellent! Using articles properly is a key aspect of sounding fluent.
2.	Variety in Vocabulary: Instead of repeating "nice spots," you could use synonyms like “beautiful places" or "scenic areas" to add variety.
3.	Transitions: You might consider adding transition phrases to connect your ideas more smoothly. For example, "In addition," or "Furthermore," can help guide the reader through your thoughts.
""",
        """Differences Explained:
“holidays” to “holiday”: In English, “holiday” can refer to a single vacation, while “holidays” often refers to specific celebrations or multiple days off. Using “holiday” makes it clearer that you’re talking about a vacation.

Suggestions for Further Improvement:
1.	Add More Specific Details: You could mention specific towns you want to visit or other dishes you want to try. For example, "I plan to visit towns like X and Y."
2.	Use Descriptive Language: Instead of "nice spots in nature," you could say "breathtaking waterfalls" or "beautiful parks" to create a more vivid image.
"""
    ),
    ("""Differences Explained:
•	I changed "goes to the pool usually" to "usually goes to the pool" for better flow. In English, adverbs like "usually" often come before the verb for clarity.
•	I highlighted the verb forms to show that they are now correct and match the subject "he."

Now, let's explore how you can further improve your second version to sound more natural and native-like. Here are some suggestions:

1.	Vary Sentence Structure: You can combine some sentences to make it more fluid. For example:
•	"My friend Manuel is a student who gets up at 6:00 a.m. He makes breakfast at7:00 a.m. and usually goes to the pool at 2:00 p.m."
2.	Use More Descriptive Language: You could add some adjectives or adverbs to make your writing richer:
•	"He makes a delicious breakfast every day at 7:00 a.m."
3.	Connect Ideas: Use conjunctions to connect ideas:
•	"After breakfast, he usually goes to the pool at 2:00 p.m., and then he goes to class at 10:00 a.m."

These changes can help you create a more engaging and varied text!""",
        """Differences Between Your Version and My Corrections:
1.	Subject and Article: You correctly added "a" before "student."
2.	Verb Forms: You changed "make" to "makes" and "go" to "goes," which is correct for he/she/it.
3.	Time Format: You used "a.m." and "p.m." correctly, which is a great improvement.
4.	Word Choice: You changed "the class" to "class," which is more natural in this context.
5.	Punctuation: You added periods for the time indicators, which improves clarity.

Suggestions for Further Improvement:
•	Variety in Sentence Structure: You can try combining some sentences for variety. For example: "My friend Manuel is a student. He gets up at 6:00 a.m. in the morning and makes breakfast every day at 7:00 a.m."
•	Use of Adverbs: You can add more adverbs to describe how often he does things, like "He usually goes to the pool at 2:00 p.m." to enhance the text.
"""),
    ("""Differences:
•	Corrected "inveite" to "invite."
•	Changed "comfortable" to "comfortably" for proper adverb use.
•	Fixed "this vill" to "this will" and added "and it" for clarity.
•	Changed "I'll waiting for you" to "I'll be waiting for you" for correct verb form.
•	Added a comma before "my friend" for proper punctuation.

Now, let's look at some ways you can further improve your second version of the text to sound more natural and native-like:
1.	Instead of saying "everybody will come," you could say "everyone is invited" to make it sound more inclusive.
2.	You could say "I hope you can join us" instead of "I want to invite you" for a warmer tone.
3.	You might say "It will be a fun celebration!" instead of "it will be great" for more enthusiasm.

These small changes can help your invitation feel more engaging!""",
        """Differences Explained:
1.	"invited" to "invite": The correct form here is "invite," as you are expressing your desire to invite Sebas.
2.	"come comfortable" to "come feeling comfortable": This phrase sounds more natural and clear.
3.	"it's will be great" to "it will be great": The contraction "it's" (it is) is incorrect here; just use "it will."
4.	"I'll waiting" to "I'll be waiting": The correct future form is "I'll be waiting."
5.	Added a comma before "my friend" for better punctuation.

Further Suggestions:
•	Opening: Consider starting with a friendly greeting, like "Hi Sebas! I hope you're doing well."
•	Closing: You could end with a phrase like "I really hope you can make it!" to encourage a positive response.

Would you like to explore any specific aspect of your revised email further?"""),
    ("""Differences Explained:
•	I changed "they are made" to "which are made" to connect the sentences more smoothly.
•	I added "which" before "makes the place cozy" for better sentence structure.
•	I corrected "These make the place cozy" to "which makes the place cozy" to clarify the subject of the sentence.

Now, let's talk about some ways to further improve your second version of the text to sound more natural and native-like:
1.	Instead of "is themed to Venezuelan culture," you could say "is inspired by Venezuelan culture." This sounds more natural in English.
2.	You might want to add a little more detail about the cachapas, such as "They serve Venezuelan food, especially cachapas; a delicious dish made with sweet corn and cheese."
3.	To enhance your conclusion, you could say, 'Overall, Chamos Burger is a wonderful place to enjoy authentic Venezuelan food.'""",
        """Feedback on Your Revisions:
1.	Connection of Ideas: You changed "they are" to "which are," which improves the connection between the sentences. Great job!
2.	Word Choice: You replaced "a lot of" with "many," which makes your writing sound more polished. Well done!
3.	Punctuation: You used a semicolon correctly, but remember that "this" should be followed by a period instead of a comma. It should be "culture; this makes the place cozy."
4.	Phrase Improvement: "Is themed to" could be improved to "reflects" or "represents" for a more formal tone.

Suggestions for Further Improvement:
•	Consider using more varied vocabulary. Instead of "cozy," you could use "inviting" or "welcoming."
•	Try to add more details about the food or the atmosphere to make your writing more engaging."""),
    (
        """Differences Explained
•	"spend" was changed to "spends" to match the subject "he."
•	Added "it" to clarify what he doesn't like.
•	Capitalized "He" at the beginning of the sentence for proper grammar.
•	Added a comma before "and finally" for better sentence structure.
•	Changed "travel" to "travelling" to follow the verb "loves."
•	Added a comma before "but" to separate the clauses correctly.

To further improve your text and make it sound more natural, you could consider the following suggestions:
•	Try to vary your sentence structures. For example, you could combine some sentences to make them flow better.
•	Use more descriptive words to give more detail about your best friend's activities.""",
        """Explanation of Differences:
1.	"spends": You correctly changed "spend" to "spends" to match the subject "he."
2.	"it": You added "it" to clarify what he doesn't like, which improves the sentence's clarity.
3.	"to travel": You changed "travel" to "to travel," which is the correct form after "loves."
4.	Punctuation: You added commas for better readability and flow in your sentences.

Suggestions for Further Improvement:
•	Variety in Sentence Structure: Try to vary your sentence structures to make your writing more engaging. For example, you could combine some sentences or use more complex sentences.
•	Use of Conjunctions: Instead of "and finally," consider using "also" or "in addition" to make the connection smoother."""),
    ("""Differences Explained:
•	I changed "with us" to "with you" for clarity in the introduction.
•	I added a period and adjusted punctuation in several places for better sentence structure.
•	I corrected "TVS" to "TVs" for proper pluralization.
•	I changed "waiting the food" to "waiting for the food" for correct preposition use.
•	I adjusted "costumers" to "customers" for the correct spelling.
•	I added "that" in "it's really great that the music they play" for better sentence flow.
•	I corrected "the Jumbo of Career 33" to "Jumbo on Career 33" for proper phrasing.
•	I capitalized "San Pio Park" and adjusted "the career of Megamall" to "the Megamallarea" for clarity.
•	I changed "the neighbour" to "the neighborhood" for correct terminology in this context.

Ways to Further Improve:
1.	Vary Your Sentence Structure: Try using different sentence types to make your writing more engaging. For example, use questions or exclamatory sentences.
2.	Add More Personal Touch: Share a specific memory or moment from your visit to make your experience more relatable.
3.	Use Descriptive Language: Instead of saying "it's great," describe how the food made you feel or what it looked like.

If you want to continue improving your text or explore more options, let me know!
""",
        """Feedback Summary:
•	Improvements: You made several good changes, including correcting grammar and improving clarity. Your text flows better now, and it’s more engaging.
•	Areas to Improve: Watch for subject-verb agreement (e.g., "has" instead of "have").
Be careful with prepositions (e.g., "related to" instead of "related with").
Consider using more varied vocabulary to enhance your descriptions.


Overall Score: 4 out of 5
This shows significant improvement! Your writing is clear and engaging, and with a few more adjustments, it can be even better.

Would you like to take any further actions from the list?"""),
    (
        """Differences:
•	Added a comma after "Carvajaliños Restaurant" for clarity.
•	Corrected "a Lot" to "a lot" (lowercase "l").
•	Removed "for me" from "The most delicious foods for me are" to make it more concise.
•	Changed "the speciality" to "the specialty" for correct spelling.
•	Changed "it's made with a special sauce flavorful" to "it's made with a special sauce that is flavorful" for better clarity and grammatical structure.
•	Added a semicolon instead of a comma before "it's made" for better punctuation.

Suggestions for Further Improvement:

To make your text sound more natural and native-like, consider the following examples:
1.	Instead of "the restaurant sells a lot of dishes," you could say, "the restaurant offers a wide variety of dishes."
2.	Instead of "the atmosphere is very nice," you might say, "the atmosphere is inviting and pleasant."
3.	Instead of "the specialty is pasta," you could say, "the highlight of the menu is the pasta."

These changes can help enhance your writing by using more descriptive language and improving the flow of your sentences.""",
        """Feedback on Your Revision:
1.	Clarity: Your revised text is clearer, especially in the second sentence where you removed "for me," making it more direct.
2.	Word Choice: "Welcoming" is a better choice than "friendly" when describing an atmosphere; it conveys a positive feeling.
3.	Error Correction: You used "a Lot" instead of "a lot." Remember that "lot" should be in lowercase.
4.	Sentence Structure: The phrase "a special sauce flavorful" is slightly awkward. It would be more natural to say "a flavorful special sauce."

Suggestions for Further Improvement:
•	Try to combine some sentences to improve the flow. For example: "The atmosphere is very nice because you can sit outside in a welcoming environment."
•	Consider adding more details about the dishes or the restaurant's ambiance to make it more engaging."""),
    (
        """Differences between your second version and the correction:
•	Punctuation: I removed the comma before "Africa" and added a comma before "and Germany" to make the list clearer.
•	Verb Form: Changed "Buy" to "buy" (lowercase) to match the sentence structure.
•	Verb Correction: Changed "ger" to "get" for the correct verb form.
•	Capitalization: Changed "AM" to "am" (lowercase) to maintain consistency in verbforms.

Now, let's talk about ways to improve your second version to sound more natural and native-like:
1.	Use Lowercase for Common Nouns: Always use lowercase letters for verbs unless they start a sentence. For example: "I want to buy a house."
2.	Combine Sentences: You can combine some sentences for better flow. For example: "I want to buy a house, a good car, a finca, and a superbike motorbike."
3.	Clarify Your Intentions: You can express your future plans more clearly. For example:  "I am going to get married and have three children: two boys and a girl."

Keep practicing! You're doing well! If you have more questions or want to continue, let me know!""",
        """Changes Highlighted:
•	Removed the comma after "visit."
•	Corrected the capitalization of "buy."
•	Changed "ger" to "get."
•	Changed "motorbike" to "motorcycle" for clarity (both are correct, but "motorcycle" is more commonly used in standard English).
•	Added a comma before "and" to separate the two clauses.

Suggestions for Improvement:
1.	Consistency in Capitalization: Use lowercase for verbs like "buy" unless they are at the beginning of a sentence.
2.	Punctuation: Remember to use commas correctly in lists and before conjunctions.
3.	Clarity: Using "motorcycle" can make your writing clearer for a wider audience.

Would you like to make any more changes or ask questions about your text? """),
    (
        """Differences Explained:
1.	"travelling to all around" ➜ "traveling throughout": This change makes the phrase more natural in English.
2.	"we should know well our country first than other countries" ➜ "we should know our country well before other countries": This rephrasing corrects the comparative structure and improves clarity.
3.	Added punctuation: Commas and semicolons were added for better sentence structure and flow.
4.	"help as many people as I can with my job" ➜ "help as many people as I can through my work": This change makes the sentence sound more professional and clear.
5.	"make a nice family with a lot of love" ➜ "make a loving family": This revision simplifies the phrase and avoids repetition of "nice."

Now, let's talk about how you can further improve your second version to sound even more natural and native-like:

1.	Vary Sentence Structure: Try to mix short and long sentences for better rhythm. For example, "I have a lot of dreams for the future; one of them is traveling throughout Colombia." can be rephrased to "I have many dreams for the future. One of them is to travel throughout Colombia."
2.	Use Synonyms: Instead of repeating "nice," consider using synonyms like "wonderful," "loving," or "beautiful" to enhance your vocabulary.
3.	Add More Details: You could elaborate on your dreams. For example, instead of just saying you want to meet Shakira, you could add why you admire her or what you would like to ask her.
""",
        """Differences Explained:
1.	"traveling throughout" is a more natural phrase than "travelling to all around."
2.	Changing "know well our country first than" to "know our country well before visiting“ improves clarity and grammatical structure.
3.	"buy a lot of clothes" sounds more natural than "buy so many clothes."
4.	"have a good sense of style" is clearer and more common than "have a good style."
5.	The use of semicolons helps connect related ideas more smoothly, enhancing readability.

Suggestions for Further Improvement:
•	To make your text sound even more natural, you could consider adding more details or examples about what kind of activist work you plan to do. This would enhance the depth of your writing.
•	You might also want to elaborate on what "good sense of style" means to you—this can give the reader a clearer picture of your aspirations.
"""
    )
]


In [6]:
batch_results = compare_batch(text_pairs)



 Processing Pair 1...


 Processing Pair 2...


 Processing Pair 3...


 Processing Pair 4...


 Processing Pair 5...


 Processing Pair 6...


 Processing Pair 7...


 Processing Pair 8...


 Processing Pair 9...



In [7]:
import pandas as pd

df = pd.DataFrame(batch_results)
df


Unnamed: 0,Pair,TTR_A,TTR_B,LexDensity_A,LexDensity_B,RareWordRatio_A,RareWordRatio_B,SentLen_A,SentLen_B,Passive_A,...,Polarity_A,Polarity_B,Subjectivity_A,Subjectivity_B,Modal_A,Modal_B,Nominals_A,Nominals_B,LogicalMarkers_A,LogicalMarkers_B
0,1,0.738,0.723,0.4,0.387,0.5,0.495,22.5,27.4,0,...,0.367188,0.241327,0.55,0.443878,"can, could, might, can","can, could, could","natural, transition, addition","vacation, vacation, improvement, mention",furthermore,
1,2,0.552,0.585,0.408,0.277,0.436,0.607,29.75,27.5,0,...,0.185185,0.211905,0.385185,0.461905,"can, can, could, can","can, can","clarity, natural, sentence","improvement, natural, punctuation, clarity, im...",,
2,3,0.579,0.6,0.323,0.294,0.609,0.661,24.11,26.75,1,...,0.290278,0.284126,0.430556,0.467424,"will, 'll, 'll, can, will, could, could, can, ...","will, will, will, 'll, 'll, 'll, could, can, w...","clarity, punctuation, natural, celebration, in...","natural, contraction, punctuation",,
3,4,0.608,0.661,0.378,0.311,0.507,0.623,25.12,17.27,4,...,0.231833,0.377083,0.527333,0.570833,"could, might, could","should, should, could, could","sentence, sentence, natural, natural, enhance","connection, connection, punctuation, improveme...",,
4,5,0.636,0.631,0.377,0.294,0.611,0.701,23.86,35.4,1,...,0.294872,0.179167,0.433333,0.554167,"could, could",could,"sentence, sentence, natural, sentence","explanation, sentence, clarity, punctuation, r...",,
5,6,0.538,0.758,0.326,0.409,0.604,0.544,20.07,19.57,0,...,0.320312,0.251103,0.432812,0.479902,,"can, would","clarity, introduction, punctuation, several, s...","several, clarity, agreement, enhance, improvement",,
6,7,0.508,0.682,0.336,0.358,0.633,0.532,26.2,20.78,3,...,0.379312,0.255364,0.528307,0.541017,"could, might, could, can","should, would, can","clarity, speciality, special, special, clarity...","clarity, sentence, correction, sentence, speci...",,because
7,8,0.559,0.673,0.345,0.292,0.609,0.794,20.08,18.67,1,...,0.270433,0.14,0.358333,0.3,"can, can","can, would","correction, punctuation, sentence, correction,...","capitalization, clarity, improvement, capitali...",,
8,9,0.55,0.655,0.362,0.411,0.479,0.464,23.86,21.89,2,...,0.338235,0.369048,0.518137,0.525397,"should, should, can, can, can, can, could, cou...","could, would, might, can","natural, clarity, punctuation, sentence, sente...","natural, clarity, grammatical, natural, readab...",,


In [8]:
#Interpretation Framework
from IPython.display import Markdown

Markdown("""
### Interpretation Guide

| Feature | Meaning if Higher | Meaning if Lower |
|--------|-------------------|------------------|
| **TTR (Type-Token Ratio)** | More lexical diversity | Repetitive vocabulary |
| **Lexical Density** | Dense, information-rich | Light, informal or vague |
| **Rare Word Ratio** | More domain-specific, formal vocabulary | Common, general-purpose language |
| **Sentence Length** | More complex syntax, academic tone | Simpler syntax, easy readability |
| **Passive Voice** | Formal or impersonal tone | Direct and active writing |
| **Flesch Score** | Easier to read if high | More difficult if low |
| **Modality** | Use of certainty, probability | Lacking modal nuance |
| **Nominalizations** | More abstract, academic tone | More direct or basic expression |
| **Logical Markers** | Shows structured reasoning | Missing discourse cohesion |
""")



### Interpretation Guide

| Feature | Meaning if Higher | Meaning if Lower |
|--------|-------------------|------------------|
| **TTR (Type-Token Ratio)** | More lexical diversity | Repetitive vocabulary |
| **Lexical Density** | Dense, information-rich | Light, informal or vague |
| **Rare Word Ratio** | More domain-specific, formal vocabulary | Common, general-purpose language |
| **Sentence Length** | More complex syntax, academic tone | Simpler syntax, easy readability |
| **Passive Voice** | Formal or impersonal tone | Direct and active writing |
| **Flesch Score** | Easier to read if high | More difficult if low |
| **Modality** | Use of certainty, probability | Lacking modal nuance |
| **Nominalizations** | More abstract, academic tone | More direct or basic expression |
| **Logical Markers** | Shows structured reasoning | Missing discourse cohesion |


**Polarity**
Measures the sentiment orientation of the text.

Range: -1.0 (very negative) to +1.0 (very positive), with 0 being neutral.

Polarity Value	Meaning	Example Tone
> 0.5	Strongly positive	Enthusiastic, optimistic
0.1 to 0.5	Mildly positive	Positive but moderate
Around 0	Neutral	Objective, factual
-0.1 to -0.5	Mildly negative	Slightly critical or sad
< -0.5	Strongly negative	Angry, very critical

Use in analysis:

Higher polarity suggests more positive/optimistic or supportive language.

Lower polarity indicates negativity, criticism, or concern.

**Subjectivity**
Measures how subjective or objective the text is.

Range: 0.0 (very objective/factual) to 1.0 (very subjective/opinionated).

Subjectivity Value	Meaning	Example Tone
< 0.3	Mostly objective/factual	Informative, neutral reports
0.3 to 0.6	Mix of facts and opinion	Balanced, some personal views
> 0.6	Highly subjective	Opinionated, emotional

Use in analysis:

High subjectivity signals personal opinions, feelings, or judgments.

Low subjectivity implies a more neutral, report-like style.

In [9]:
#Export Results to CSV
df.to_csv("text_comparisons.csv", index=False)
print("Results exported")


Results exported


In [10]:
# Calculate average metrics for Text A and Text B, excluding non-numeric columns
numeric_cols_a = [col for col in df.columns if col.endswith('_A') and df[col].dtype != 'object']
numeric_cols_b = [col for col in df.columns if col.endswith('_B') and df[col].dtype != 'object']

avg_a = df[numeric_cols_a].mean()
avg_b = df[numeric_cols_b].mean()

avg_a.index = avg_a.index.str.replace('_A', '_Average')
avg_b.index = avg_b.index.str.replace('_B', '_Average')

average_comparison = pd.concat([avg_a, avg_b], axis=1)

print("Average Linguistic Features: Text A vs. Text B")
display(average_comparison)

Average Linguistic Features: Text A vs. Text B


Unnamed: 0,0,1
TTR_Average,0.585333,0.663111
LexDensity_Average,0.361667,0.337
RareWordRatio_Average,0.554222,0.602333
SentLen_Average,23.95,23.914444
Passive_Average,1.333333,0.444444
Flesch_Average,61.535918,58.586801
Polarity_Average,0.297516,0.256569
Subjectivity_Average,0.462666,0.482725
