In [1]:
import pandas as pd
from statistics import mean 

# Task 1 – Understanding Sentiment of an Input Text

##### Task description

You task is to implement a small system that will provide sentiment analysis of a single sentences that will be given to it as an input. Response of the system should be positive, negative or neutral based on the “estimated” sentiment of input text. The 
estimation should be done according to the sentiment algorithms provided. Algorithm is very simple, but yet can be useful to 
derive interesting insights from an unstructured texts of social media.

##### Sample Input/Output Texts

Text->Sentiment:

I really like new book of that author.                  -> Positive

I hate new regulations about importing policies.        -> Negative

Look at that door, it’s still open.                     -> Neutral

In [2]:
afinn = pd.read_csv('AFINN-111.txt', delimiter = "\t", header=None, names = ['word', 'score'])
afinn

Unnamed: 0,word,score
0,abandon,-2
1,abandoned,-2
2,abandons,-2
3,abducted,-2
4,abduction,-2
...,...,...
2472,yucky,-2
2473,yummy,3
2474,zealot,-2
2475,zealots,-2


In [3]:
def cleaning(word):

# here each word is being cleaned by any included non-alphabetical characters, 
# like in a case: car?, Holidays!, planets:, 'yummy', sweet, yucky. ...        
# Besides that it excludes any non-alphabetical typos
        
    clean_word = ''        

    for letter in word: 
        if letter.isalpha():
            clean_word += letter
        else:
            next
        
    return clean_word


def sentiment(text):
    
    score = 0
    
    for each in text.split():
        # cleaning words from any non-alphabetical characters
        clean_each = cleaning(each).lower()
    
        if clean_each in list(afinn['word']):
            #identifying the index of the word in afinn dataset
            each_index = list(afinn['word']).index(clean_each)
            # adding up all scores
            score += afinn['score'][each_index]
        else:
            next

# here is the final result of the sentiment analysis            
    if score > 0:
        print('positive')
    elif score < 0:
        print('negative')
    else:
        print('neutral')

##### Checking for the code with examples

In [4]:
myText = 'abandon and "yummy and a3bandoned and yes? and ability, yucky, zealot, aboard yeees,'
sentiment(myText)

positive


In [5]:
myText = 'I really like new book of that author.'
sentiment(myText)

positive


In [6]:
myText = 'I hate new regulations about importing policies.'
sentiment(myText)

negative


In [7]:
myText = 'Look at that door, it’s still open.'
sentiment(myText)

neutral


# Task 2 – Understanding Sentiment of New Terms

##### Task Description

In this part you need to create a functionality within your existing solution that computes a new sentiment for the terms that 
do not appear in the file AFINN-111.txt and save them into another data frame or CSV file called ‘sentiments_new’.

##### Algorithms Description

Certain words can be used to “estimate” the sentiment of a input text. Once you know the sentiment of the input texts that contain some new term, you can assign a sentiment to the new term itself. However, there are a number of words called “stop-words” that are not actually not bringing much sentiment to the text directly. You should ignore such words, list of them can be found in ‘stop-words-english.zip’ file.

##### Sample Input/Output Texts

Text: 
I really like new book of that author.

New Terms Sentiment:
author 2.0
book 2.0

In [8]:
# Collecting all stopwords at hand

stopwords1 = pd.read_csv('stop-words_english_1_en.txt', header=None)
stopwords2 = pd.read_csv('stop-words_english_2_en.txt', header=None)
stopwords3 = pd.read_csv('stop-words_english_3_en.txt', header=None)
stopwords4 = pd.read_csv('stop-words_english_4_google_en.txt', header=None)
stopwords5 = pd.read_csv('stop-words_english_5_en.txt', header=None)
stopwords6 = pd.read_csv('stop-words_english_6_en.txt', header=None)

stopwords = pd.concat([stopwords1, stopwords2, stopwords3, stopwords4, stopwords5, stopwords6], axis=0)

stopwords.index = [list(range(1, len(stopwords)+1))]
stopwords.columns = ['word']

stopwords

Unnamed: 0,word
1,able
2,about
3,above
4,abroad
5,according
...,...
2398,yourself
2399,yourselves
2400,you've
2401,z


In [11]:
def new_sentiments(text):

    # creating a bunch of lists to store necessary data
    new_words = []
    sentiment_scores = []

    # Main function

    for each in text.split():
        each = cleaning(each)
    
        # filtering out new words, which are not included in stopwords
        if each in list(stopwords['word']):
            next
        elif each in list(afinn['word']):
            next
        else:
            new_words.append(each)
        
        if each in list(afinn['word']):
            #identifying the index of the word in afinn dataset
            each_index = list(afinn['word']).index(each)
            # appending sentiment word's score based on AFINN dataset to our temporary sentiment_scores list
            sentiment_scores.append(afinn['score'][each_index])

    # calculating average new_score for new_words in a case when there is more sentiment words in a text already
    new_scores = round(mean(sentiment_scores))

    # creating output in a Data Frame format
    new_sentiment = pd.DataFrame(
        {'words': new_words, 
         'scores': new_scores}, 
        index = list(range(1, len(new_words)+1))
    )

    # automatically saves data in CSV format
    new_sentiment.to_csv('new_sentiment.csv')

    print('The output is automatically saved in your local PC under a name "new_sentiment.csv" format')
    return new_sentiment

##### Checking for the code with examples

In [12]:
myText = "I really like new book of that author."
new_sentiments(myText)

The output is automatically saved in your local PC under a name "new_sentiment.csv" format


Unnamed: 0,words,scores
1,regulations,-3
2,importing,-3
3,policies,-3
