In [52]:
import os
import spacy
from textstat.textstat import textstatistics,legacy_round
import pandas as pd

In [44]:


# Splits the text into sentences, using 
# Spacy's sentence segmentation which can 
# be found at https://spacy.io/usage/spacy-101
def break_sentences(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    return list(doc.sents)

# Returns Number of Words in the text
def word_count(text):
    sentences = break_sentences(text)
    words = 0
    for sentence in sentences:
        words += len([token for token in sentence])
    return words

# Returns the number of sentences in the text
def sentence_count(text):
    sentences = break_sentences(text)
    return len(sentences)

# Returns average sentence length
def avg_sentence_length(text):
    words = word_count(text)
    sentences = sentence_count(text)
    average_sentence_length = float(words / sentences)
    return average_sentence_length


def syllables_count(word):
    return textstatistics().syllable_count(word)


def avg_syllables_per_word(text):
    syllable = syllables_count(text)
    words = word_count(text)
    ASPW = float(syllable) / float(words)
    return legacy_round(ASPW, 1)

# Return total Difficult Words in a text
def difficult_words(text):

    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    # Find all words in the text
    words = []
    sentences = break_sentences(text)
    for sentence in sentences:
        words += [str(token) for token in sentence]

    # difficult words are those with syllables >= 2
    # easy_word_set is provide by Textstat as 
    # a list of common words
    diff_words_set = set()

    for word in words:
        syllable_count = syllables_count(word)
        if word not in nlp.Defaults.stop_words and syllable_count >= 2:
            diff_words_set.add(word)

    return len(diff_words_set)


def poly_syllable_count(text):
    count = 0
    words = []
    sentences = break_sentences(text)
    for sentence in sentences:
        words += [token for token in sentence]


    for word in words:
        syllable_count = syllables_count(word)
        if syllable_count >= 3:
            count += 1
    return count


def flesch_reading_ease(text):
    """
        Implements Flesch Formula:%%!
        Reading Ease score = 206.835 - (1.015 × ASL) - (84.6 × ASW)
        Here,
        ASL = average sentence length (number of words 
                divided by number of sentences)
        ASW = average word length in syllables (number of syllables 
            divided by number of words)
    """
    FRE = 206.835 - float(1.015 * avg_sentence_length(text)) -\
        float(84.6 * avg_syllables_per_word(text))
    return legacy_round(FRE, 2)


def gunning_fog(text):
    per_diff_words = (difficult_words(text) / word_count(text) * 100) + 5
    grade = 0.4 * (avg_sentence_length(text) + per_diff_words)
    return grade


def smog_index(text):
    """
        Implements SMOG Formula / Grading
        SMOG grading = 3 + ?polysyllable count.
        Here, 
        polysyllable count = number of words of more
        than two syllables in a sample of 30 sentences.
    """

    if sentence_count(text) >= 3:
        poly_syllab = poly_syllable_count(text)
        SMOG = (1.043 * (30*(poly_syllab / sentence_count(text)))**0.5) \
                + 3.1291
        return legacy_round(SMOG, 1)
    else:
        return 0


def dale_chall_readability_score(text):
    """
        Implements Dale Challe Formula:%%!
        Raw score = 0.1579*(PDW) + 0.0496*(ASL) + 3.6365
        Here,
            PDW = Percentage of difficult words.
            ASL = Average sentence length
    """
    words = word_count(text)
    # Number of words not termed as difficult words
    difficult_words_count = difficult_words(text)
    count = words - difficult_words_count
    if words > 0:

        # Percentage of words not on difficult word list

        per = float(count) / float(words) * 100
        
# diff_words stores percentage of difficult words
    diff_words = 100 - per

    raw_score = (0.1579 * diff_words) + \
        (0.0496 * avg_sentence_length(text))

# If Percentage of Difficult Words is greater than 5 %, then;
# Adjusted Score = Raw Score + 3.6365,
    # otherwise Adjusted Score = Raw Score
    if diff_words > 5: 

        raw_score += 3.6365
        
    return legacy_round(raw_score, 2)


In [45]:

path = "../sentences-to-annotate/"
dir_list = os.listdir(path)

In [51]:
word_difficulty = {}
for file in dir_list:
    dataframe1 = pd.read_excel(path+file)
    print(path+file)
    essay = ""
    print(dataframe1["sentence"])
    for sentence in dataframe1["sentence"]:
        if type(sentence) == str:
            essay = essay + sentence
    if file not in word_difficulty.keys():
        word_difficulty[file] = {}
#     word_difficulty[file]["text_standard"] =  textstat.text_standard(essay)
    essay = essay.title()
    word_difficulty[file]["essay"] = essay
    word_difficulty[file]["flesch_reading_ease"] = flesch_reading_ease(essay)
    word_difficulty[file]["difficult words"] = difficult_words(essay)
    word_difficulty[file]["dale_chall_readability_score"] = dale_chall_readability_score(essay)


../sentences-to-annotate/1000617.xlsx
0     Nowadays everything has to be nice, new, good ...
1     These are important qualities which have to fu...
2     So it is ok that many advertisements make prod...
3     Well in my opinion it's absolutly ok but maybe...
4     At the moment I'm studying mediadesign in Berlin.
5     For that reason I have to learn and develop wa...
6     Of course perhaps it is a little bit wrong but...
7     I had a professor who told us that he had to d...
8     So he solved the problem by taking a piece of ...
9     Equally important is the fact that people only...
10      So how would you present maybe diet - products?
11    Would you show a fat lady with the nice and ne...
12             I think not a really difficult question.
13    Of course I can understand the people who say ...
14    But in this case I think that this is their fa...
15    You can't say that advertising agencies should...
16    In my opinion every business have to do their ...
17    All 

../sentences-to-annotate/1020096.xlsx
0     From the this question i would say, i strongly...
1           theseday we can see advertising everywhere.
2     so we can have effect by advertising but most ...
3     most products seem really nice in the advertis...
4                                         but It's not.
5     because advertiser want to sell thier products...
6           for example, fewdays ago i bought bulejean.
7     i saw advertising from TV and jeans color is r...
8                    so i bought jean use the internet.
9            but when i got a jean, color is different.
10                                      they lie to me.
11    second, advertiser get a money from the produc...
12    so advertiser make famous advertisements in ad...
13    and If product sell very much, his advertising...
14    finally, from the this reason first, advertise...
15                                                   so
Name: sentence, dtype: object
../sentences-to-annotate/83310.xlsx


../sentences-to-annotate/421490.xlsx
0    I surely agree with the opinion that  most adv...
1    In my opinion a very great number of these adv...
2    This fact could be expleined thinking that the...
3    Instead the television is used by a number of ...
4    Indeed it' s easy for youngs to believe in a w...
5    Advertisements that gives am image of a produc...
6    To conclude I think it's a very serious proble...
Name: sentence, dtype: object
../sentences-to-annotate/1304821.xlsx
0     Advertisements are one of the most power means...
1     Undoubtedly, nowadays, advertisements are ever...
2     There are on the TV, in the radio, on the top ...
3     Most of advertisements combine with words and ...
4     I Take myself as an example, I was always bein...
5     Nevertheless, have you ever think about that a...
6     Are these modles on the billbroad really as pe...
7     I mean They have no pimples or extra hairs at ...
8     Something must be done for this advertisement ...
9     

../sentences-to-annotate/452402.xlsx
0     I agree with this sentence, especially after t...
1     In fact, at the beginning there were shops and...
2     The focus was on the relationship between the ...
3     The rule of owners as vendors was critical for...
4     Then arrives supermarkets and stores with publ...
5     Publicity made with flyers and big images in t...
6     Nowadays the increasing of technologies, such ...
7     It's not important what you said or who's your...
8     A spot on TV, created as a "short story" or a ...
9     Also using a famous testimonial for your spot,...
10    The globalization made by Internet allowed a p...
11    In conclusion, well-done advertisements not on...
Name: sentence, dtype: object
../sentences-to-annotate/1020881.xlsx
0        I agree with this statement for these reasons.
1                                                   NaN
2     First, most advertisements appeal an advantage...
3     Let's think about the advertisements of a medi...

../sentences-to-annotate/449429.xlsx
0    Now adays advertisement is the best way to sel...
1    First,Advertisement only shows the good qualit...
2    Second,Advertisement make product look In bett...
3    Finally,Advertisement shows that products are ...
4    In conclusion ,I like to say that most of the ...
Name: sentence, dtype: object
../sentences-to-annotate/1159224.xlsx
0     In todays world of cut throat competition, adv...
1     There is a large variety of products available...
2     Similar products are manufactured by many comp...
3        Hence customer today has a variety of chooice.
4                                                   NaN
5     The differences in these similar products may ...
6     The advertising helps in exaggerating this dif...
7                                Take cars for example.
8     One car manufacturer may exaggerate about the ...
9     Another car manufacurer in the same category m...
10    Thus both the manufacturers have a selling poi...
11  

../sentences-to-annotate/1013285.xlsx
0     I todays world with advancement in technology ...
1     These advertisements are best way of marketing...
2     But the question arises, Are these advertiseme...
3                  what they advertise well not always.
4     The advertising industry is continuously growi...
5     The advertisement developers make it so attrsa...
6     Even some people love to watch advertisements ...
7     In the recent past we have seen so many cases ...
8     For example there was a case on a skin lotion ...
9     The advertisement of that company claimed that...
10    Whereas it never happened and subsequently the...
11    Such advertisements are nothing but tools of f...
12    The government agencies should certainly make ...
13    Summarily i would say that most of the adverti...
Name: sentence, dtype: object
../sentences-to-annotate/146183.xlsx
0    Advertisements play really an important role i...
1    The world of industry is affected mostly by th...
2

../sentences-to-annotate/1150823.xlsx
0     I completely agree with the statement that mos...
1         There are maily three reasons for my opinion.
2     First, the advertisement agencies over-publici...
3     This is particularly done in the case of a lau...
4     Since, no one has ever consumed the product, t...
5     It is, after the product launch, that consumer...
6                                                   NaN
7     Another reason is that, most advertisements in...
8     The consumers start to believe that they might...
9     Sheer curousity of consumer for that product, ...
10    It is shortly after that that the consumer rea...
11    This results in lost of trust of consumers in ...
12                                                  NaN
13    Third,nowdays there is an increasing participa...
14    For example, a new range of nike sport shoes i...
15    As these famous personalities have a very larg...
16    Who want to be like their on screen idols in t...
17    Peop

../sentences-to-annotate/56222.xlsx
0    In daily life there are a lot of products and ...
1    Yes, I am agree with this statement because I ...
2    For example, if I have to buy something produc...
3    Is like buy a T-shirt of a famouse stilist or ...
4    But if we research more and more in another st...
5    Another example is a dishes soap that i call w...
6    Than a person go to buy it because the adverti...
7    Than, in summary I want to say that is importa...
8    The first thing to do is think and the second ...
Name: sentence, dtype: object
../sentences-to-annotate/584412.xlsx
0     I totally agree with the statement given above...
1                                                   NaN
2     If you believe everything what the producers s...
3     But if you know that they give you not all the...
4                                                   NaN
5     For example in politics, during the election, ...
6                Though they even know their ambitions.
7     So t

../sentences-to-annotate/138619.xlsx
0     The influence of marketing in our societies  h...
1     The practice of publicity also has changed dra...
2     First, people most of the time are getting mes...
3     Second, when people face reality, it is when t...
4     Their reality come true as something  of very ...
5     People realized that the message was just an s...
6     Finally, people who use advertisements this wa...
7     So , nowadays we have to be very careful about...
8     It is very easy to get someone attention offer...
9     Overall, people who advertise, they do not reg...
10    It is their final purpose, regardless the cons...
11           This is  a very popular way to get people.
12             Thus , I agree with the above statement.
Name: sentence, dtype: object
../sentences-to-annotate/128033.xlsx
0     I agree with the statement that products look ...
1     First of all, I believe that in the society of...
2     This is particollary true when you talk about ...


../sentences-to-annotate/47916.xlsx
0     I am agree that most advertisement make produc...
1     I think that it is normal because the attenden...
2     A good advertisement must make a product seem ...
3     In fact in the commercial law the product must...
4     If the advertisement give some positive sensat...
5     It is not important if in the advertisement th...
6       but it is important like the product is seemed.
7                  For example I want to buy a new car.
8     I see an advertisement that make a car like a ...
9     This is a little example but for everything we...
10    The consumist world in that we live impose us ...
11    The people are joked to the adverstisement and...
12    When I was a child I wanted a little toys beca...
13    But only with the passage of the age I learnt ...
Name: sentence, dtype: object
../sentences-to-annotate/1082108.xlsx
0                          I agree with this statement.
1     Some time companies do marketing of their prod...


../sentences-to-annotate/830942.xlsx
0    For me the advertisements now are part of our ...
1                          We are submitted from this.
2    Sometimes the advertisements are very importan...
3    But it could be a disaster if we trust everyth...
4    The terrible thing is that sometimes they real...
5    For me is a problem about our world, about the...
6    Some pleople for money, for the power can do e...
7    I really like graphic designer but sometimes i...
Name: sentence, dtype: object
../sentences-to-annotate/1030800.xlsx
0     I believe that most advertisements make produc...
1                      There are two reasons as follow.
2                                                   NaN
3     First of all, most people would agree that the...
4     For example, clothing manufacturers and design...
5            Another example can be food advertisement.
6     Food is presented in the best dishes with pict...
7     I remember that I had to return some of the pr...
8      

../sentences-to-annotate/1017282.xlsx
0     I agree with the statement that products seem ...
1                                                   NaN
2     The marketing department of every company trie...
3     Their aim is to influence the consumers to buy...
4                                                   NaN
5     Because of this, they are using different thin...
6                       I will buy this one next week."
7                                                   NaN
8     You can see advertisement like this everywhere...
9                                                   NaN
10    Another example for that could be an advertise...
11    But on the other side it is probably not an ec...
12                                                  NaN
13    The managers try with this strategy that the a...
14     They want to sell more than the other companies.
15                                                  NaN
16    That`s why the consumers need to decide by the...
17    The 

../sentences-to-annotate/1637927.xlsx
0     I do agree that most advertisements are some t...
1     Although there are laws which will take in to ...
2     for example in the  E commerce, the customers ...
3     advertisements should have a proper informatio...
4     the advertisers should act according to the la...
5     There should be  customer line for every produ...
6     In the third world countries customers endup i...
7     In my openion there should be proper informati...
8     Lack of customer care line and not having a ch...
9     the laws should be strict and customer should ...
10    there are so many products which are advertise...
Name: sentence, dtype: object
../sentences-to-annotate/1026395.xlsx
0                        I agree with this statement .
1    I think that a large amount of advertisement i...
2    But people are fascinated  by this idea of exc...
3    And this modern society prefers to believe to ...
4    The advertisements tell us that every thing is...
5  

../sentences-to-annotate/89860.xlsx
0                I completely agree with the statement.
1            Nowaday advertising  show you a lifestyle.
2     They mainly speaks about the feeling or how yo...
3     That because this is the most important way to...
4     Playing with the feelings :a psycological appr...
5     The more advertisement are made the more a pro...
6     Advertisements are less involved in the qualit...
7     To show the potentials of a good is less actra...
8     If a product will became a trend a big amount ...
9     On television the advertisement are like a sho...
10           The "slogan" or "jingle" is important too.
11                              It is part of our life.
12                        It must be a funny statement.
13                      Poeple must remember it easily.
14    But sometimes it's not related with the qualit...
15    I my opinion showing a product at the supermar...
16          I particulary agree with "test promotions".
17    For ex

../sentences-to-annotate/1116877.xlsx
0    I believe most advertisement make products see...
1    This is because the kind of things shown in ad...
2    Firstly advertisement make products seem bette...
3    For example when an advertisement of a bike is...
4    With hyped mileage quoted in the advertisement...
5    Secondly companies intentionally make good adv...
6    By doing this they try to create a branding an...
7    Take for example laptop companies like Acer, H...
8    But in actuality it does it and is very simila...
9    Ultimately advertisements make products seem m...
Name: sentence, dtype: object
../sentences-to-annotate/999352.xlsx
0    Most advertisements make products seem much be...
1    in fact, all over the world use this process.H...
2    first of all, i see wherever i go advertisemen...
3    Furthermore, the product owners have to compet...
4    In conclusion, the marketing for the product i...
5    Therefore, the products seem much better than ...
Name: sentence,

In [53]:
csv_file = 'output_final2.csv'
header = list(next(iter(word_difficulty.values())).keys())

# Write to CSV file
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['filename'] + header)
    
    # Write the header
    writer.writeheader()
    
    # Write data
    for filename, values in word_difficulty.items():
        row = {'filename': filename, **values}
        writer.writerow(row)

print(f"CSV file '{csv_file}' has been created.")

CSV file 'output_final2.csv' has been created.


In [None]:
def feature_engineering(filename):
    X, y = get_data(filename)
    corr = X.corr()
    ax = sns.heatmap(corr , vmin=0, vmax=1)
    plt.show()  
