In [30]:
pip install textstat

Note: you may need to restart the kernel to use updated packages.


### Textstats

In [7]:
import textstat

In [8]:
textstat.set_lang("en")

In [9]:
text = """
Data science is the main focus of most sciences and studies right now, 
it needs a lot of things like AI, programming, statistics, 
business understanding, effective presentation skills and much more. 
That's why it's not easy to understand or study. But we can do it, we are doing it.
Data science has become the standard solving problem framework for academia and 
the industry and it's going to be like that for a while. But we need to remember 
where we are coming from, who we are and where we are going.
"""

In [10]:
# Count Syllables
textstat.syllable_count(text)

126

In [11]:
# Lexicon count
textstat.lexicon_count(text, removepunct=True)

91

In [12]:

# Sentence count
textstat.sentence_count(text)

4

In [13]:
# Flesch Reading Ease formula
textstat.flesch_reading_ease(text)

65.25

In [14]:
# Flesch-Kincaid Grade Level
textstat.flesch_kincaid_grade(text)

9.8

This means the text is very difficult to read. Best understood by university graduates. Which seems fine.

In [15]:
# Fog Scale (Gunning FOG Formula)
textstat.gunning_fog(text)

11.76

In [16]:
# Automated Readability Index
textstat.automated_readability_index(text)

11.8

In [17]:
# Dale-Chall Readability Score
textstat.dale_chall_readability_score(text)

7.54

In [18]:
# Readability Consensus
textstat.text_standard(text, float_output=False)

'11th and 12th grade'

In [19]:
# Run all at once
import inspect
funcs = ["textstat." + inspect.getmembers(textstat, predicate=inspect.ismethod)[i][0] for i in range(1,28)]

In [20]:

for elem in funcs:
    method = eval(elem)
    textstat.set_lang("en")
    print(elem.split(".")[1])
    print(method(text))
    print(" ")

avg_character_per_word
4.64
 
avg_letter_per_word
4.47
 
avg_sentence_length
22.8
 
avg_sentence_per_word
0.04
 
avg_syllables_per_word
1.4
 
char_count
422
 
coleman_liau_index
8.94
 
dale_chall_readability_score
7.54
 
dale_chall_readability_score_v2
7.54
 
difficult_words
16
 
difficult_words_list
['programming', 'framework', 'data', 'doing', 'presentation', 'industry', 'studies', 'focus', 'science', 'understanding', 'sciences', 'standard', 'problem', 'effective', 'statistics', 'solving']
 
flesch_kincaid_grade
9.8
 
flesch_reading_ease
65.25
 
gunning_fog
11.76
 
letter_count
407
 
lexicon_count
91
 
linsear_write_formula
10.7
 
lix
42.58
 
polysyllabcount
8
 
reading_time
6.08
 
rix
4.5
 
sentence_count
4
 
set_lang
None
 
smog_index
11.2
 
spache_readability
5.5588379120879114
 
syllable_count
126
 
text_standard
11th and 12th grade
 


#### Correct Spelling

In [31]:
pip install autocorrect

Collecting autocorrect
  Downloading autocorrect-1.1.0.tar.gz (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 1.1 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: autocorrect
  Building wheel for autocorrect (setup.py) ... [?25ldone
[?25h  Created wheel for autocorrect: filename=autocorrect-1.1.0-py3-none-any.whl size=1810765 sha256=03719765741247d3062790ee2f018ff8c95913e79244269fb69b0b76149fa8eb
  Stored in directory: /Users/AliceGuo/Library/Caches/pip/wheels/90/36/07/189583e2a4285fd6c41b0a80d94d14ac0bb93a33c20d4ad3ed
Successfully built autocorrect
Installing collected packages: autocorrect
Successfully installed autocorrect-1.1.0
Note: you may need to restart the kernel to use updated packages.


In [32]:
# Here I'm misspelling :
# presentation as presentatio
# focus as focsu
# framework as framwork 
text = """
Data science is the main focsu of most sciences and studies right now, 
it needs a lot of things like AI, programming, statistics, 
business understanding, effective presentatio skills and much more. 
That's why it's not easy to understand or study. But we can do it, we are doing it.
Data science has become the standard solving problem framwork for academia and 
the industry and it's going to be like that for a while. But we need to remember 
where we are coming from, who we are and where we are going.
"""

In [33]:
#There are great libraries for that in Python, but autocorrect is the easiest one to use
from autocorrect import Speller
check = Speller(lang='en')
check(text)

"\nData science is the main focus of most sciences and studies right now, \nit needs a lot of things like Ai, programming, statistics, \nbusiness understanding, effective presentation skills and much more. \nThat's why it's not easy to understand or study. But we can do it, we are doing it.\nData science has become the standard solving problem framework for academia and \nthe industry and it's going to be like that for a while. But we need to remember \nwhere we are coming from, who we are and where we are going.\n"

Which has all fixed.