<a href="https://colab.research.google.com/github/Nivratti/100-Days-Of-ML-Code/blob/master/Optimized_Sentiment_Analysis_of_tweets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Loading data

In [0]:
import pandas as pd

In [5]:
data = pd.read_csv('webdev_tweets.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,tweets
0,0,#R2D29 to #R2D36\nThis week I was practicing v...
1,1,Day 38 #100DaysOfCode Practiced @Codecademy cs...
2,2,R2 D45 of #100daysOfCode\n\nSolved the #javaSc...
3,3,Advanced Blogger With Sentinel &amp; Laravel 5...
4,4,The Advanced Web Developer Bootcamp -&gt; http...


# Define frameworks list

## Create dictionary of framework lists

In [0]:
# Instead of storing frameworks list in separate variable store it in dictionary
# it will reduce code length while manipulating

frameworks = {
    "react": ['react', 'reactJS', 'reactjs', 'React','#react', '#reactJS', '#reactjs', '#React'],
    "angular": [
        '#angular', '#angularjs', '#angularJS', '#Angular', '#AngularJS', '#Angular2', 
        '#Angular2.0','angular', 'angularjs', 'angularJS', 'Angular', 'AngularJS', 
        'Angular2', 'Angular2.0'
    ],
    "cakephp": [
        'cakephp', 'CakePHP', 'cakePhp', 'CakePhp','#cakephp', '#CakePHP', '#cakePhp', '#CakePhp'
    ],
    "laravel": ['laravel', 'Laravel','#laravel', '#Laravel'],
    "rails": [
        'rails', 'Rails', 'RubyonRails', 'rubyonrails','RubyOnRails','#rails', '#Rails', 
        '#RubyonRails', '#rubyonrails', '#RubyOnRails'
    ],
    "asp": ['ASP.net', 'ASP.NET', 'asp', 'ASP', '#ASP.net', '#ASP.NET', '#asp', '#ASP'],
    "django": [
        'Django', 'django', 'django1', 'django2', 'django3', 'Django1', 'Django2', 
        'Django3', '#Django', '#django', '#django1', '#django2', '#django3', '#Django1', '#Django2', '#Django3'
    ],

    "vue": ['Vue', 'vue', 'VueJS', 'vuejs', 'VueJs', 'vueJS', '#Vue', '#vue', '#VueJS', '#vuejs', '#VueJs', '#vueJS'],
    "express": ['express', 'ExpressJS', '#express', '#ExpressJS'],
    "node": [
        'Node', 'Node.js', 'NodeJS', 'Node.JS', 'nodeJS', 'nodejs', '#Node', '#Node.js', '#NodeJS', '#Node.JS', 
        '#nodeJS', '#nodejs'
    ]
}


## Make combined list

In [0]:
# iterate over all frameworks and build list
# use list compression to reduce code length and to get faster speed
flat_frameworks_list = [item for sublist in list(frameworks.values()) for item in sublist]
print(flat_frameworks_list)

['react', 'reactJS', 'reactjs', 'React', '#react', '#reactJS', '#reactjs', '#React', '#angular', '#angularjs', '#angularJS', '#Angular', '#AngularJS', '#Angular2', '#Angular2.0', 'angular', 'angularjs', 'angularJS', 'Angular', 'AngularJS', 'Angular2', 'Angular2.0', 'cakephp', 'CakePHP', 'cakePhp', 'CakePhp', '#cakephp', '#CakePHP', '#cakePhp', '#CakePhp', 'laravel', 'Laravel', '#laravel', '#Laravel', 'rails', 'Rails', 'RubyonRails', 'rubyonrails', 'RubyOnRails', '#rails', '#Rails', '#RubyonRails', '#rubyonrails', '#RubyOnRails', 'ASP.net', 'ASP.NET', 'asp', 'ASP', '#ASP.net', '#ASP.NET', '#asp', '#ASP', 'Django', 'django', 'django1', 'django2', 'django3', 'Django1', 'Django2', 'Django3', '#Django', '#django', '#django1', '#django2', '#django3', '#Django1', '#Django2', '#Django3', 'Vue', 'vue', 'VueJS', 'vuejs', 'VueJs', 'vueJS', '#Vue', '#vue', '#VueJS', '#vuejs', '#VueJs', '#vueJS', 'express', 'ExpressJS', '#express', '#ExpressJS', 'Node', 'Node.js', 'NodeJS', 'Node.JS', 'nodeJS', 'no

# Get tweet and total count

In [17]:
total_tweets = len(data['tweets'])
print(f"Total tweets : {total_tweets}")

tweets = data['tweets'] 

Total tweets : 5329


# Cleaning tweet

## Install package

In [0]:
!pip install tweet-preprocessor

## Import package

In [0]:
 import preprocessor as p

# Apply text cleaning on each tweet


In [9]:
cleaned_data = pd.DataFrame()
cleaned_data['tweets'] = data['tweets'].apply(p.clean)
cleaned_data.head()

Unnamed: 0,tweets
0,to This week I was practicing vanilla JS throu...
1,Day Practiced css. Reinforced fundamentals wit...
2,"R2 D45 of Solved the Fibonacci algorithm on , ..."
3,Advanced Blogger With Sentinel &amp; Laravel -...
4,The Advanced Web Developer Bootcamp -&gt;


# create helper functions

In [0]:
# helper functions

def find_percentage(mention, total_tweets):
    percentage = round(mention * 100 / total_tweets, 2)
    return percentage

def most_frequent(List):
    """
    Find most frequent element in a list
    """
    counter = 0
    num = List[0] 
      
    for i in List: 
        curr_frequency = List.count(i) 
        if(curr_frequency> counter): 
            counter = curr_frequency 
            num = i 
  
    return num 
  
# List = [2,  2, 1, 3] 
# print(most_frequent(List)) 

In [32]:
# build dictionary to store tweet and count for each framework 
# initialize count to zero
framworks_occurance = {name: {"count": 0, "tweets": []} for name in  list(frameworks.keys())}
print(f"framworks_occurance : {framworks_occurance}")

# append unknown key to dict -- for uncategorized tweets
# some tweets contains framework names that are not in our list ex. Vanila js
framworks_occurance["other"] = {"count": 0, "tweets": []}
print(f"framworks_occurance after adding unknown or other: {framworks_occurance}")

framworks_occurance : {'react': {'count': 0, 'tweets': []}, 'angular': {'count': 0, 'tweets': []}, 'cakephp': {'count': 0, 'tweets': []}, 'laravel': {'count': 0, 'tweets': []}, 'rails': {'count': 0, 'tweets': []}, 'asp': {'count': 0, 'tweets': []}, 'django': {'count': 0, 'tweets': []}, 'vue': {'count': 0, 'tweets': []}, 'express': {'count': 0, 'tweets': []}, 'node': {'count': 0, 'tweets': []}}
framworks_occurance after adding unknown or other: {'react': {'count': 0, 'tweets': []}, 'angular': {'count': 0, 'tweets': []}, 'cakephp': {'count': 0, 'tweets': []}, 'laravel': {'count': 0, 'tweets': []}, 'rails': {'count': 0, 'tweets': []}, 'asp': {'count': 0, 'tweets': []}, 'django': {'count': 0, 'tweets': []}, 'vue': {'count': 0, 'tweets': []}, 'express': {'count': 0, 'tweets': []}, 'node': {'count': 0, 'tweets': []}, 'other': {'count': 0, 'tweets': []}}


In [0]:
# iterate over each tweet and categories it as per framework occurance
for tweet in tweets:
    frameworks_found = []
    for framework_name,framework_keywords  in frameworks.items():
        words = tweet.split()
        for word in words:
            if word in framework_keywords:
                frameworks_found.append(framework_name)
                break

    if len(frameworks_found) > 0:
        # pickup most occuring framework and increase its count
        framework_most_frequent = most_frequent(frameworks_found)

        # increase its count by 1
        framworks_occurance[framework_most_frequent]["count"] += 1

        # append tweet in framework
        framworks_occurance[framework_most_frequent]["tweets"].append(tweet)
    else:
        framworks_occurance["other"]["count"] += 1
        framworks_occurance["other"]["tweets"].append(tweet)
        continue


In [34]:
# getting react framework tweet details
print(f'Total react tweets : {framworks_occurance["react"]["count"]}')

Total react tweets : 1897


# Analysis helper functions

In [0]:
from textblob import TextBlob
import numpy as np

def find_polarity(tweets):
    x = 0
    blob = ''
    polarity = []
    for tweet in tweets:
        blob = TextBlob(tweet)
        polarity.append(blob.sentiment.polarity)
    return polarity

def find_subjectivity(tweets):
    x = 0
    blob = ''
    subjectivity = []
    for tweet in tweets:
        blob = TextBlob(tweet)
        subjectivity.append(blob.sentiment.subjectivity)
    return subjectivity

def find_sentiment(polarity):
    sentiment = []
    for pol in polarity:
        if pol in np.arange(0.3, 1.1, 0.1):
            sentiment.append(1)
        elif pol in np.arange(-0.3, -1.1, -0.1):
            sentiment.append(-1)
        else:
            sentiment.append(0)
    
    return sentiment

def find_sentiment_ratio(sentiments):
    pos = 0
    neg = 0
    neu = 0
    for sentiment in sentiments:
        if(sentiment == -1):
            neu += 1
        elif(sentiment == 1):
            pos += 1
        else:
            neu += 1
    sentiment_ratio = {
        "Positive": pos, "Negative": neg, "Neutral": neu
    }
    return sentiment_ratio

# Perform analysis

In [78]:
for framework, fvalue in framworks_occurance.items():
    print("-" * 40)
    print(f"started analysis of {framework} framework")
    # framework tweets
    ftweets = framworks_occurance[framework]["tweets"]

    # find polarity
    polarity = find_polarity(ftweets)

    # find find_subjectivity
    subjectivity = find_subjectivity(ftweets)

    # find sentiments
    sentiments = find_sentiment(polarity)

    # find sentiment ratio
    sentiment_ratio = find_sentiment_ratio(sentiments)

    # store computed values
    framworks_occurance[framework]["polarity"] = polarity
    framworks_occurance[framework]["subjectivity"] = subjectivity
    framworks_occurance[framework]["sentiments"] = sentiments
    framworks_occurance[framework]["sentiment_ratio"] = sentiment_ratio

    print(f"completed analysis of {framework} framework")

----------------------------------------
started analysis of react framework
completed analysis of react framework
----------------------------------------
started analysis of angular framework
completed analysis of angular framework
----------------------------------------
started analysis of cakephp framework
completed analysis of cakephp framework
----------------------------------------
started analysis of laravel framework
completed analysis of laravel framework
----------------------------------------
started analysis of rails framework
completed analysis of rails framework
----------------------------------------
started analysis of asp framework
completed analysis of asp framework
----------------------------------------
started analysis of django framework
completed analysis of django framework
----------------------------------------
started analysis of vue framework
completed analysis of vue framework
----------------------------------------
started analysis of express frame

In [57]:
framworks_occurance["react"]["sentiment_ratio"]

{'negative': 0, 'neutral': 1527, 'positive': 370}

# visualization

# sentiment ratio

In [81]:
import plotly.express as px

for framework, fvalue in framworks_occurance.items():
    sentiment_ratio = fvalue["sentiment_ratio"]
    # print(sentiment_ratio)

    framework_tweet_count = fvalue["count"]
    if framework_tweet_count == 0:
        continue
        
    labels = list(sentiment_ratio.keys())
    percentages = [find_percentage(value, framework_tweet_count) for value in list(sentiment_ratio.values()) ]

    gdata = {
        'framework': labels, 
        'percentage': percentages
    }
    df = pd.DataFrame.from_dict(gdata)
    # print(gdata)

    fig = px.pie(
        df, names='framework', values='percentage',
        hover_data=['framework', 'percentage'],
        # color='framework',
        width=800, height=400
    )
    fig.update_layout(
        title=f"{framework.capitalize()} sentiment analysis",
    )
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()


# combined stack graph( Showing percentage of sentiment of each framework based on their total count

In [0]:
import plotly.express as px

all_framewoks = []
all_sentment_labels = []
all_percentages = []

for framework, fvalue in framworks_occurance.items():
    sentiment_ratio = fvalue["sentiment_ratio"]
    # print(sentiment_ratio)

    framework_tweet_count = fvalue["count"]
    if framework_tweet_count == 0:
        continue
        
    labels = list(sentiment_ratio.keys())
    repeated_framework_name = [framework] * len(labels) # ex. ['other', 'other', 'other']

    percentages = [find_percentage(value, framework_tweet_count) for value in list(sentiment_ratio.values()) ]
    
    all_framewoks.extend(repeated_framework_name)
    all_sentment_labels.extend(labels)
    all_percentages.extend(percentages)


In [104]:

import plotly.express as px
import pandas as pd

df = pd.DataFrame(dict(percentages=all_percentages, labels=all_sentment_labels, frameworks=all_framewoks))

fig = px.bar(df, x='frameworks', y='percentages', color='labels', barmode='group',)
fig.show()