### Function needed for file analysis

In [205]:
import re


def clean_tweet(tweet): 
        ''' 
        Utility function to clean tweet text by removing links, special characters 
        using simple regex statements. 
        '''
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",tweet).split())

def get_tweet_sentiment(tweet): 
    ''' 
    Utility function to classify sentiment of passed tweet 
    using textblob's sentiment method 
    '''
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_tweet(tweet)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 'positive'
    elif analysis.sentiment.polarity == 0: 
        return 'neutral'
    else: 
        return 'negative'

def get_tweet_objective(tweet): 
    ''' 
    Utility function to classify sentiment of passed tweet 
    using textblob's subjectivity method 
    '''
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_tweet(tweet)) 
    # set sentiment 
    if analysis.sentiment.subjectivity > 0.5: 
        return 'subjective'
    else: 
        return 'objective'

def get_ratio(tweet, file):
    ''' 
    Utility function to calculate the percentage of the tag
    provided
    '''
    return {
        'file': file.split('.')[0],
        'positive %': tweet[tweet == 'positive'].count()/tweet.count() * 100,
        'netative %': tweet[tweet == 'negative'].count()/tweet.count() * 100,
        'ratie [positive:negative] %': tweet[tweet == 'positive'].count()/tweet[tweet == 'negative'].count() * 100}

In [197]:
def clean_hashtag(tweet):
    lists = ["#plasticpollutes",
            "#plasticpollution",
            "#noplastic",
            "#sustainability",
            "#zerowaste",
            '#plastic',
            '#banplastic',
            '#ZeroWaste',
            '#FeedtheHungry',
            '# ecofriendly']
    for lis in lists:
        tweet = tweet.replace(f'{lis}', '')
    return tweet
                

In [199]:
import nltk
nltk.download('punkt')

def get_bigrams(text):
    nltk_tokens = nltk.word_tokenize(text)
    return (list(nltk.bigrams(nltk_tokens)))

def get_trigrams(text):
    nltk_tokens = nltk.word_tokenize(text)
    return (list(nltk.trigrams(nltk_tokens)))

def flatten_list(series):
    slist =[]
    for x in series:
        slist.extend(x)
    return slist

[nltk_data] Downloading package punkt to /Users/chen_zeng/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Loop through all the files, calculate the positive, negative percentage and ratio based on files

In [210]:
import pandas as pd
from textblob import TextBlob
import collections


files = ['noplastic.csv', 'plasticpollutes.csv', 'plasticpollution1.csv', 'plasticpollution2.csv',
        'sustainability.csv', 'zerowaste.csv']
results = pd.DataFrame()
for file in files:
    # read file
    print(f"------{file}------")
    df = pd.read_csv(f'./hashtags/{file}')
    
    # get full_text and clean up
    dff = df['full_text'].to_frame()
    dff['clean'] = dff['full_text'].apply(lambda x: (clean_tweet(x)))
#     dff['clean'] = dff['clean'].apply(lambda x: clean_hashtag(x))
    
    # do the analysis on sentiment
    dff['sentiment'] = dff['clean'].apply(lambda x: get_tweet_sentiment((x)))
    dff['subjectivity'] = dff['clean'].apply(lambda x: get_tweet_objective((x)))
    
    # calculate the ratio and keep the dataframe together
    series = dff['sentiment']
    result = get_ratio(series, file)
    results = results.append(result, ignore_index=True)
    
    # add n_grams and get the most_common words
    print("---- bigrams ----")
    dff['bi_grams'] = dff['clean'].apply(lambda x: get_bigrams(x))
    lis = flatten_list(dff['bi_grams'])
    c = collections.Counter(lis)
    print(c.most_common(10))
    print("---- trigrams ----")
    dff['tri_grams'] = dff['clean'].apply(lambda x: get_trigrams(x))
    lis = flatten_list(dff['tri_grams'])
    c = collections.Counter(lis)
    print(c.most_common(10))
    

    del df, dff

------noplastic.csv------
---- bigrams ----
[(('EstoNOtienequePARAR', 'SalimosDeEsta'), 39), (('SalimosDeEsta', 'Informacion'), 39), (('Informacion', 'zocoup'), 39), (('zocoup', 'hechoamano'), 39), (('hechoamano', 'artesanal'), 39), (('artesanal', 'noplastic'), 39), (('noplastic', 'economiasostenible'), 39), (('economiasostenible', 'sinplasticos'), 39), (('plasticfree', 'plasticpollution'), 30), (('ecofriendly', 'pollution'), 30)]
---- trigrams ----
[(('EstoNOtienequePARAR', 'SalimosDeEsta', 'Informacion'), 39), (('SalimosDeEsta', 'Informacion', 'zocoup'), 39), (('Informacion', 'zocoup', 'hechoamano'), 39), (('zocoup', 'hechoamano', 'artesanal'), 39), (('hechoamano', 'artesanal', 'noplastic'), 39), (('artesanal', 'noplastic', 'economiasostenible'), 39), (('noplastic', 'economiasostenible', 'sinplasticos'), 39), (('ecofriendly', 'eco', 'ecofriendlyliving'), 28), (('eco', 'ecofriendlyliving', 'pollutionfree'), 27), (('ecofriendlyliving', 'pollutionfree', 'plasticfree'), 27)]
------plasti

In [140]:
results

Unnamed: 0,file,netative %,positive %,ratie [positive:negative] %
0,noplastic,12.307692,30.461538,247.5
1,plasticpollutes,12.5,50.0,400.0
2,plasticpollution1,17.199327,46.383516,269.682152
3,plasticpollution2,13.930348,50.248756,360.714286
4,sustainability,10.02,55.04,549.301397
5,zerowaste,13.576159,43.377483,319.512195


### Dataframe exploration

In [49]:
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_rows', 400)

In [55]:
dff[['sentiment', 'clean', 'subjectivity']]

Unnamed: 0,sentiment,clean,subjectivity
0,positive,Great feeling when you protect&amp;let it go into wild #ROBIN UK's national bird #Aneeshwar named it as Regee the Robin #Noplastic _Cymru _Bhandari,subjective
1,positive,We all need to live more sustainably or we will be living without so much more #SupportYourLocalFarmer #growyourownfood #zerowaste #noplastic #shoplocal,objective
2,positive,_India Sir 🙏 Truly magnificent Indeed seems like an experience of a lifetime Query Is there a website where one can such travel arrangements bookings stays etc #NoPlastic #NoGarbage #ResponsibleTourismTogether,subjective
3,positive,One of the most refreshing drinks you can have. Sweet Mango Lassi made for mango pulp a bit of yogurt &amp water. If you've never tried it give it a go And yes paper straws no plastic :) . . . #sweetmango #mango #thecumin #giveitatry #paperstraws #noplastic #noplasticwaste,subjective
4,neutral,Today's anniversary #sale items Offer valid until September 30th #zerowaste #zerowasteecostore #sustainable #sustainability #planetearth #gogreen #earth #climatechange #noplastic,objective
5,neutral,#ecofriendly #eco #ecofriendlyliving #pollutionfree #plasticfree #plasticpollution #positivenews #environmentallyfriendly #environnement #ecofriendly #pollution #ocean #climatechange #oceanpollution #noplastic #lesswaste #saveourplanet..,objective
6,positive,Playing around with a new design for our Sandalwood &amp Vanilla #soap range What do you think #soapmaking #soapdesign #becreative #zerowaste #noplastic #handmade #handmadesoap #handcrafted #shopsmall #shoplocal #indieoxford #oxford #oxforduk,objective
7,negative,0181 -Green Shades Mandala Tote Bag via #totebag #bags #apparel #pattern #musthave #travelbag #beachbag #grocerybag #fashion #noplastic #gift #birthday #holiday #onsale #Zazzle,objective
8,positive,The planet is beautiful And it’s up to us to keep it that way for our own survival #Maldives #adducity #SaveTheWorld #ClimateAction #ClimateEmergency #environment #noplastic #savemarinelife #doyourshare,subjective
9,neutral,Zeg luitjes ik zit naar de #TourdeFrance te kijken Ik vraag mij af nu ik zie hoeveel bidons die renners wegflikkeren wie die troep eigenlijk opruimt #noplastic,objective


In [62]:
dff[(dff['sentiment']=='negative') & (dff['subjectivity'] =='subjective')]

Unnamed: 0,full_text,sentiment,clean,subjectivity
36,"@GretaThunberg Thank you @GretaThunberg for all your work. It's so frustrating that people aren't taking this seriously! But, FYI, I try to do something more to limit my impact every day. #SupportyourLocalFarmer #noplastic #zerowaste",negative,Thank you for all your work It's so frustrating that people aren't taking this seriously But FYI I try to do something more to limit my impact every day #SupportyourLocalFarmer #noplastic #zerowaste,subjective
52,"Filters from cigarette butts, bottles, bottle caps, food wrappers, grocery bags, and polystyrene containers are the worst polluters of beaches.\n\n#liquife #liquifeco #noplastic #plasticfree #stopplastic #beachlitter https://t.co/qXCnvA7z61",negative,Filters from cigarette butts bottles bottle caps food wrappers grocery bags and polystyrene containers are the worst polluters of beaches. #liquife #liquifeco #noplastic #plasticfree #stopplastic #beachlitter,subjective
153,Plastic also contains lots of nasty chemicals that can poison animals and cause internal injuries. #veshisaafu #Maldives #adducity #noplastic #doyourshare #ClimateAction #ClimateEmergency #ClimateChange #environment #clean #keepclean #savemarinelife #saveoceans #savetheearth https://t.co/fu5ihl1LtW,negative,Plastic also contains lots of nasty chemicals that can poison animals and cause internal injuries #veshisaafu #Maldives #adducity #noplastic #doyourshare #ClimateAction #ClimateEmergency #ClimateChange #environment #clean #keepclean #savemarinelife #saveoceans #savetheearth,subjective
228,“@Oxford_Soap_Co” Doughnut ring #soaponarope 🤗 Very limited numbers in our @CoveredMarketOx store so pop by today or tomorrow to snap yours up! Only £7.50 🤩 #handmadesoap #handmade #handcrafted #vegan #vegetarian #zerowaste #noplastic ... https://t.co/yua6BALUJI https://t.co/pkwnlBkxwN,negative,“ _Soap_Co Doughnut ring #soaponarope Very limited numbers in our store so pop by today or tomorrow to snap yours up Only £7.50 #handmadesoap #handmade #handcrafted #vegan #vegetarian #zerowaste #noplastic ..,subjective
232,Doughnut ring #soaponarope 🤗 Very limited numbers in our @CoveredMarketOx store so pop by today or tomorrow to snap yours up! Only £7.50 🤩 #handmadesoap #handmade #handcrafted #vegan #vegetarian #zerowaste #noplastic #indieoxford #oxford #oxforduk #shopsmall #shoplocal https://t.co/oOmlT0wzAB,negative,Doughnut ring #soaponarope Very limited numbers in our store so pop by today or tomorrow to snap yours up Only £7.50 #handmadesoap #handmade #handcrafted #vegan #vegetarian #zerowaste #noplastic #indieoxford #oxford #oxforduk #shopsmall #shoplocal,subjective
291,Nonprofit @washedashoreart turns #OceanPlastic from Oregon beaches into 'beautiful and horrifying' sculptures of wildlife harmed by #PlasticPollution \nhttps://t.co/NOcsJor2AJ\n#noplastic #zerowaste #plasticfree #ecofriendly #savetheplanet #gogreen #recycle #sustainability #art https://t.co/OVwyJNk1Yo,negative,Nonprofit turns #OceanPlastic from Oregon beaches into 'beautiful and horrifying sculptures of wildlife harmed by #PlasticPollution #noplastic #zerowaste #plasticfree #ecofriendly #savetheplanet #gogreen #recycle #sustainability #art,subjective
315,This is just horrible.\n\n#noplastic #plasticfree #marinelife #savethemarinelife #savethebirds #saveseabirds #plastic #breakfreefromplastic #liquife #liquifeco https://t.co/M4YHlgTlbj,negative,This is just horrible. #noplastic #plasticfree #marinelife #savethemarinelife #savethebirds #saveseabirds #plastic #breakfreefromplastic #liquife #liquifeco,subjective
