In [144]:
#!/usr/bin/env python

import math
import pandas as pd
from utils import get_char_count
from utils import get_words
from utils import get_sentences
from utils import count_syllables
from utils import count_complex_words

class Readability:
    analyzedVars = {}

    def __init__(self, text):
        self.analyze_text(text)

    def analyze_text(self, text):
        words = get_words(text)
        char_count = get_char_count(words)
        word_count = len(words)
        sentence_count = len(get_sentences(text))
        syllable_count = count_syllables(words)
        complexwords_count = count_complex_words(text)
        avg_words_p_sentence = word_count/sentence_count
        
        self.analyzedVars = {
            'words': words,
            'char_cnt': float(char_count),
            'word_cnt': float(word_count),
            'sentence_cnt': float(sentence_count),
            'syllable_cnt': float(syllable_count),
            'complex_word_cnt': float(complexwords_count),
            'avg_words_p_sentence': float(avg_words_p_sentence)
        }

    def ARI(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = 4.71 * (self.analyzedVars['char_cnt'] / self.analyzedVars['word_cnt']) + 0.5 * (self.analyzedVars['word_cnt'] / self.analyzedVars['sentence_cnt']) - 21.43
        return score
        
    def FleschReadingEase(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = 206.835 - (1.015 * (self.analyzedVars['avg_words_p_sentence'])) - (84.6 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']))
        return round(score, 4)
        
    def FleschKincaidGradeLevel(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = 0.39 * (self.analyzedVars['avg_words_p_sentence']) + 11.8 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']) - 15.59
        return round(score, 4)
        
    def GunningFogIndex(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = 0.4 * ((self.analyzedVars['avg_words_p_sentence']) + (100 * (self.analyzedVars['complex_word_cnt']/self.analyzedVars['word_cnt'])))
        return round(score, 4)

    def SMOGIndex(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = (math.sqrt(self.analyzedVars['complex_word_cnt']*(30/self.analyzedVars['sentence_cnt'])) + 3)
        return score

    def ColemanLiauIndex(self):
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            score = (5.89*(self.analyzedVars['char_cnt']/self.analyzedVars['word_cnt']))-(30*(self.analyzedVars['sentence_cnt']/self.analyzedVars['word_cnt']))-15.8
        return round(score, 4)

    def LIX(self):
        longwords = 0.0
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            for word in self.analyzedVars['words']:
                if len(word) >= 7:
                    longwords += 1.0
            score = self.analyzedVars['word_cnt'] / self.analyzedVars['sentence_cnt'] + float(100 * longwords) / self.analyzedVars['word_cnt']
        return score

    def RIX(self):
        longwords = 0.0
        score = 0.0 
        if self.analyzedVars['word_cnt'] > 0.0:
            for word in self.analyzedVars['words']:
                if len(word) >= 7:
                    longwords += 1.0
            score = longwords / self.analyzedVars['sentence_cnt']
        return score
        

In [145]:
data_listing=pd.read_csv('/Users/jaideep/Desktop/Airbnb Project Files/Data/new_data/airbnb_sen_merged.csv')
df_imported = pd.DataFrame(data_listing)


df_new=df_imported[["id","name","summary","space","description","neighborhood_overview","notes","transit","access","interaction","house_rules","host_about"]]
df_new

Unnamed: 0,id,name,summary,space,description,neighborhood_overview,notes,transit,access,interaction,house_rules,host_about
0,70598,Attic room with doublebed available,Attic room with double bed available . The hou...,I live in a nice neigborhood in the north of A...,Attic room with double bed available . The hou...,I live in a quiet neighbourhood in the North o...,Check in on Saturday and Sunday is flexible an...,Bus connection to Amsterdam Central Station an...,"Access to Kitchen, WIFI, bathroom, extra toilet,",If you have any question about Amsterdam just ask,-Please don�۪t smoke in the house or smoke sta...,"46 years old, originally from the Caribbean, l..."
1,76459,Yellow Studio Palamedes Amsterdam,The comforts of home on a unique spot.,THE COMFORTS OF HOME ON A GREAT LOCATION This ...,The comforts of home on a unique spot. THE COM...,All the interresting places you must have been...,"Hello dear guest, thank you very much for your...",Several tram stops at 100 mtrs.,There is a safe in each Studio.,"We live in the same house, guests can allways ...",RULES OF THE HOUSE In spite of what some visit...,We are interested in Graphic- and Industrial ...
2,264628,Lovely Charming House in Amsterdam!,,We wish you a warm welcome in our guesthouse i...,We wish you a warm welcome in our guesthouse i...,,,,,,Smoking is not allowed We love our house and e...,"Hi,\r\rI am Karen, 40 years old, mother of thr..."
3,293337,Sunny Apartment along river IJ,Great location opposit Central Station. Just a...,"Just opposite central station, passengers crui...",Great location opposit Central Station. Just a...,,,We are opposit Central Station (at the other ...,There will be one room in the apartment (my st...,Questions during your stay? I m always availab...,Bedlinen and towels provided. Please leave the...,"I love to cook, to entertain, to travel and t..."
4,515088,Familyfriendly Suite incl breakfast,Our charming suite with sitting/sleeping area ...,The Suite: A lofty 65m2 peaceful room with a ...,Our charming suite with sitting/sleeping area ...,Ransdorp is one of the most authentic places i...,Breakfast and bicycles are included in the roo...,There is a bus stop at 100m from our house and...,The Suite: A lofty 65m2 peaceful room with a ...,Interaction: as much as the guests want themse...,Guests are asked to inform me about there arri...,My husband and I have travelled extensively al...
5,582382,Amsterdam Home 10 minutes to center,,"In the 'Bloemenbuurt', neighborhood, of Amster...","In the 'Bloemenbuurt', neighborhood, of Amster...",,,,,,We expect visitors to use their house as if th...,"Hi, I'm from Dublin, Ireland and my husband is..."
6,591951,House on waterfront,Nice house with a lot of space and fantastic v...,"Sunny, light (family)house close to the city i...",Nice house with a lot of space and fantastic v...,,,,,,,!
7,613088,Your own cottage in the countryside,Zunderdorp is a little 17th century village ne...,Zunderdorp is a little 17th century village ne...,Zunderdorp is a little 17th century village ne...,,,,You are staying in a little house next to our ...,I serve breakfast in the morning and am at hom...,No smoking in the rooms,"Living in the countryside, just near Amsterdam..."
8,667305,"Green, sportive experience in A'dam","AMSTERZONIAN, brand new 2pp no smoking accomod...","(URL HIDDEN) AMSTERZONIAN, brand new 2 pp no-s...","AMSTERZONIAN, brand new 2pp no smoking accomod...","Very friendly, rustique and spacy neighbourhoo...",Access in one minute (bicycle)/5 min. (walking...,"Bus terminal with 6 bus lines, incl. night bus...",Free use of new bicycles. Free use of (North S...,The host is available at all times to assist t...,"There are no special ""rules"". I assume that gu...",Yes I accept the guest
9,761411,Amsterdam Apartment - Clean Quiet Safe,"My home is in north part of Amsterdam, really ...","My apartment is commodious and clean, 85 m2, s...","My home is in north part of Amsterdam, really ...","The neighborhood is very green, quiet, safe an...",You can arrive at my apartment by bus 24 hours...,From Amsterdam airport Schiphol you take the t...,The entire apartment is accessible for the gue...,One important reason to rent my apartment is b...,Respectful and quiet. You may come and go as d...,"I'm a single man, live in Amsterdam north, and..."


In [146]:
df_readability=df_new[["id","name"]]
df_readability

Unnamed: 0,id,name
0,70598,Attic room with doublebed available
1,76459,Yellow Studio Palamedes Amsterdam
2,264628,Lovely Charming House in Amsterdam!
3,293337,Sunny Apartment along river IJ
4,515088,Familyfriendly Suite incl breakfast
5,582382,Amsterdam Home 10 minutes to center
6,591951,House on waterfront
7,613088,Your own cottage in the countryside
8,667305,"Green, sportive experience in A'dam"
9,761411,Amsterdam Apartment - Clean Quiet Safe


In [147]:
if __name__ == "__main__":
    for i in range(1,len(df_new.columns)):
        a=[];b=[];c=[];d=[];e=[];f=[];g=[];h=[];l=[];
        df_a=df_new.iloc[:,[0,i]].copy()
        for j, row in df_a.iterrows():
            text=row[str(df_a.columns[1])]
            try: 
                string=str(text).decode('utf-8')
                rd =Readability(string)
                h.append(rd.ARI())
                a.append(rd.FleschReadingEase())
                b.append(rd.FleschKincaidGradeLevel())
                c.append(rd.GunningFogIndex())
                d.append(rd.SMOGIndex())
                e.append(rd.ColemanLiauIndex())
                f.append(rd.LIX())
                g.append(rd.RIX())
                l.append(row['id'])
            except:
                h.append("")
                a.append("")
                b.append("")
                c.append("")
                d.append("")
                e.append("")
                f.append("")
                g.append("")
                l.append("")
                    #dataframe
        
        #df_readability.reset_index()
        df_output = pd.DataFrame({str(df_a.columns[1])+'_ARI': h,str(df_a.columns[1])+'_FleschReadingEase' :a,str(df_a.columns[1])+'_FleschKincaidGradeLevel':b,str(df_a.columns[1])+'_GunningFogIndex':c,str(df_a.columns[1])+'_SMOGIndex':d,str(df_a.columns[1])+'_ColemanLiauIndex':e,str(df_a.columns[1])+'_LIX':f,str(df_a.columns[1])+'_RIX':g})
        #df_readability=pd.merge(df_output,df_readability, on='id', how='inner') 
        #df_readability = df_readability.append(df_output)
        #df_readability.reset_index()
        df_readability = pd.concat([df_readability , df_output], axis=1)
        print(df_readability.shape)
    df_readability

(520, 10)
(520, 18)
(520, 26)
(520, 34)
(520, 42)
(520, 50)
(520, 58)
(520, 66)
(520, 74)
(520, 82)
(520, 90)


In [148]:
df_readability

Unnamed: 0,id,name,name_ARI,name_ColemanLiauIndex,name_FleschKincaidGradeLevel,name_FleschReadingEase,name_GunningFogIndex,name_LIX,name_RIX,name_SMOGIndex,...,house_rules_RIX,house_rules_SMOGIndex,host_about_ARI,host_about_ColemanLiauIndex,host_about_FleschKincaidGradeLevel,host_about_FleschReadingEase,host_about_GunningFogIndex,host_about_LIX,host_about_RIX,host_about_SMOGIndex
0,70598,Attic room with doublebed available,10.272,14.718,12.32,15.64,18,45,2,10.746,...,,,7.09718,8.8827,6.8221,71.087,8.4169,31.1014,2.4,8.47723
1,76459,Yellow Studio Palamedes Amsterdam,15.895,20.875,21.37,-51.025,1.6,54,2,3,...,3.66667,12.4868,8.07654,10.6627,8.9954,53.7246,6.7385,39.9231,3.5,6.87298
2,264628,Lovely Charming House in Amsterdam!,9.33,13.54,7.6,49.48,2,45,2,3,...,2,9.7082,8.99977,7.4148,8.4248,72.0642,9.9504,35.4535,3,8
3,293337,Sunny Apartment along river IJ,5.562,8.828,9.96,32.56,2,25,1,3,...,3.16667,9.7082,10.7605,8.542,10.021,64.1823,13.9863,42.2034,4.4,12.1652
4,515088,Familyfriendly Suite incl breakfast,18.25,23.82,12.52,12.425,11.6,54,2,8.47723,...,1.8,7.24264,11.7604,8.7296,10.1659,66.7055,12.7723,45.052,5,10.2457
5,582382,Amsterdam Home 10 minutes to center,5.12,8.65,6.4167,59.745,15.7333,39.3333,2,10.746,...,2,9,4.92674,6.9131,5.5832,76.4073,8.8909,29.6169,2.14286,9.21059
6,591951,House on waterfront,6.76,7.5767,5.2467,62.79,14.5333,36.3333,1,8.47723,...,0,3,0,0,0,0,0,0,0,0
7,613088,Your own cottage in the countryside,5.12,8.65,4.45,73.845,9.0667,39.3333,2,8.47723,...,1,3,6.67935,7.7245,7.7697,66.0745,10.5161,38.0806,3.5,10.2457
8,667305,"Green, sportive experience in A'dam",9.33,13.54,7.6,49.48,10,45,2,8.47723,...,2.5,8.47723,-1.974,-0.596,0.52,100.24,2,5,0,3
9,761411,Amsterdam Apartment - Clean Quiet Safe,9.045,13.5583,6.4167,59.745,9.0667,39.3333,2,8.47723,...,2,10.3485,6.68933,7.9851,7.0422,71.29,8.6667,31.6667,2.5,8.47723


In [149]:
df_readability.to_csv("/Users/jaideep/Desktop/Airbnb Project Files/Data/new_data/readability.csv", sep=',')