In [None]:
import re
import nltk
import pandas as pd
from nltk.corpus import wordnet as wordnet
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import stopwords

In [None]:
def tweet_to_words(raw_tweet):
    
    raw_tweet = re.sub("@\w+","",raw_tweet).strip()
    raw_tweet = re.sub("http\S+","",raw_tweet).strip()
    letters_only = re.sub("[^a-zA-Z]", " ",raw_tweet) 
    words = letters_only.lower().split()                             
    stops = set(stopwords.words("english"))                  
    meaningful_words = [w for w in words if not w in stops] 
    return( " ".join( meaningful_words )) 

In [None]:
def wordnet_pos_code(tag):

    if tag.startswith('NN'):
        return wordnet.NOUN
    elif tag.startswith('VB'):
        return wordnet.VERB
    elif tag.startswith('JJ'):
        return wordnet.ADJ
    elif tag.startswith('RB'):
        return wordnet.ADV
    else:
        return ''

In [None]:
def pos_tag(sentence):

    tagged_words = []
    tokens = nltk.word_tokenize(sentence)
    tag_tuples = nltk.pos_tag(tokens)
    for (string, tag) in tag_tuples:
        token = {'word':string, 'pos':tag}            
        tagged_words.append(token)    
    return tagged_words

In [None]:
def word_sense_similarity(word, context, dummy = None):

    wordsynsets = wordnet.synsets(word)
    bestScore = 0.0
    result = None
    for synset in wordsynsets:
        for w in nltk.word_tokenize(context):
            score = 0.0
            for wsynset in wordnet.synsets(w):
                sim = wordnet.path_similarity(wsynset, synset)
                if(sim == None):
                    continue
                else:
                    score += sim
            if (score > bestScore):
                bestScore = score
                result = synset
    return result

In [None]:
def sentiwordnet_classify(text):

    score_tot = 0
    score_tot_thr = 0
    class_tot = 0
    class_tot_thr = 0
    sentences = nltk.sent_tokenize(text)
    for sentence in sentences:
        (score, score_thr) = sentence_score(sentence)
        score_tot += score
        score_tot_thr += score_thr
 
    #Trust the thresholded value more when classifying
    if score_tot_thr != 0:
        clss = 'Positive' if score_tot_thr > 0 else 'Negative'
    elif score_tot != 0:
        clss = 'Positive' if score_tot > 0 else 'Negative'
    else:
        clss = 'Neutral'
    return clss

In [None]:
def sentence_score(text, threshold = 0.75, wsd = word_sense_cdf):

    tagged_words = pos_tag(text)
 
    obj_score = 0 # object score 
    pos_score=0 # positive score
    neg_score=0 #negative score
    pos_score_thr=0
    neg_score_thr=0
 
    for word in tagged_words:
    #     print word
        if 'punct' not in word :
            sense = wsd(word['word'], text, wordnet_pos_code(word['pos']))
            if sense is not None:
                sent = swn.senti_synset(sense.name())
                if sent is not None and sent.obj_score() <> 1:
                    obj_score = obj_score + float(sent.obj_score())
                    pos_score = pos_score + float(sent.pos_score())
                    neg_score = neg_score + float(sent.neg_score())
                    if sent.obj_score() < threshold:
                        pos_score_thr = pos_score_thr + float(sent.pos_score())
                        neg_score_thr = neg_score_thr + float(sent.neg_score())
 
    return (pos_score - neg_score, pos_score_thr - neg_score_thr)

In [None]:
t_data = pd.read_csv("Tweets.csv") #Weather.csv #Strangers_Things.csv
texts = t_data['text']

In [None]:
tweet = []
for text in texts:
    tweet.append(tweet_to_words(text))