In [8]:
import pandas as pd
import snscrape.modules.twitter as sntwitter
from datetime import datetime
from datetime import timedelta
import glob
import os
import numpy
import matplotlib.pyplot as plt
import re
import seaborn as sns
import spacy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

plt.rcParams.update({'figure.figsize':(7,5), 'figure.dpi':75})
plt.rcParams["figure.autolayout"] = True

%matplotlib inline

In [123]:

companies = ["T-Mobile",
                "sprint",
                "Tide",
                "weather tech",
                "turbotax",
                "wix",
                "squarespace",
                "amazon alexa",
                "uber eats",
                'Budweiser',
                'Budlight',
                'Mars',
                'Jeep',
                'Toyota', 
                'Doritos',
                'Avocado from Mexico',
                'pringles',
                'Pepsi', 
                'Coca-Cola'
                ]

In [124]:
print(companies)

['T-Mobile', 'sprint', 'Tide', 'weather tech', 'turbotax', 'wix', 'squarespace', 'amazon alexa', 'uber eats', 'Budweiser', 'Budlight', 'Mars', 'Jeep', 'Toyota', 'Doritos', 'Avocado from Mexico', 'pringles', 'Pepsi', 'Coca-Cola']


In [125]:
filenames_before = sorted(glob.glob(f'data/before/*{company}.csv') for company in companies)
filenames_after = sorted(glob.glob(f'data/after/*{company}.csv') for company in companies)
filenames_after_superbowl = sorted(glob.glob(f'data/after/superbowl/*{company}.csv') for company in companies) #contains brand and superbowl in tweets in the text

In [126]:
def prep_concat(filenames):
    for files in filenames:
        for file in files:
            df = pd.read_csv(file, lineterminator='\n')
            directory = file.partition("/")[2].partition("/")[0]
            company_name = file.partition("_")[2].partition(".")[0]
            df['Company_Name'] = company_name
            df['Directory'] = directory
            df = df.reset_index(drop=True)
            yield df

In [127]:
before_df = pd.concat((prep_concat(filenames_before))).reset_index(drop=True)
after_df = pd.concat((prep_concat(filenames_after))).reset_index(drop=True)
after_superbowl_df = pd.concat((prep_concat(filenames_after_superbowl))).reset_index(drop=True)

In [128]:
before_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory
0,https://twitter.com/UWnt2/status/5616740628090...,2015-01-31 23:55:06+00:00,561674062809022465,Amazing weekend #off-road &amp; #camping at Th...,1,0,1,UWnt2,YOUNTO.com,433,711,Amazing weekend #off-road &amp; #camping at Th...,31,1,2015,Jeep,before
1,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:46:28+00:00,561671888288493568,List of things better than perfect Jeep weathe...,1,0,2,ShowDreamCar,Car Pictures,12775,19,List of things better than perfect Jeep weathe...,31,1,2015,Jeep,before
2,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:43:45+00:00,561671203966840834,#tbtuesday 1 yr ago when I first started worki...,1,0,1,ShowDreamCar,Car Pictures,12775,19,#tbtuesday 1 yr ago when I first started worki...,31,1,2015,Jeep,before
3,https://twitter.com/_VictoriaManson/status/561...,2015-01-31 23:28:26+00:00,561667350819520512,"Okay, that jeep is beautiful! Loving everythin...",2,0,0,_VictoriaManson,Vicki,4432,4950,"Okay, that jeep is beautiful! Loving everythin...",31,1,2015,Jeep,before
4,https://twitter.com/Jeepings/status/5616670153...,2015-01-31 23:27:06+00:00,561667015312961536,They call my house The Jeep Garage ;) #jeeplif...,3,0,11,Jeepings,Northern Jeeper‚Ñ¢,1619,738,They call my house The Jeep Garage ;) #jeeplif...,31,1,2015,Jeep,before


In [129]:
before_df['Company_Name'].unique()

array(['Jeep', 'Mars', 'sprint', 'Pepsi', 'T-Mobile', 'weather tech',
       'Avocado from Mexico', 'Budweiser', 'Tide', 'squarespace',
       'turbotax', 'Budlight', 'Coca-Cola', 'Doritos', 'Toyota', 'wix',
       'amazon alexa', 'pringles', 'uber eats'], dtype=object)

In [130]:
nlp = spacy.load('en_core_web_lg')

In [131]:
text = before_df[before_df["Company_Name"].isin(["sprint"])]['Text'].to_list()[10:20]


docs = [nlp(line) for line in text]

for doc in docs:
    print([(ent.text, ent.label_) for ent in doc.ents]) #using spacy might be a good way to detect company names but it is still not showing up. We would have to train models in order for this to work well.


[('evening', 'TIME')]
[('4', 'CARDINAL'), ('SprintSucks', 'ORG')]
[]
[('sprint', 'ORG'), ('@MasterFutbolUPO  ', 'PERSON'), ('http://t.co/NECvkyxLXY http://t.co/LECaUUYqZk', 'PERSON')]
[('Canadian', 'NORP'), ('Akeem Haynes', 'PERSON'), ('6.51', 'CARDINAL')]
[]
[('@JLeRoy01 Sprint', 'ORG'), ('#$100Credit http://t.co/s3dnLnj74P', 'MONEY')]
[('@relolutionseries', 'ORG'), ('Keirin Final Glasgow @jack_carlin97', 'ORG'), ('2nd', 'ORDINAL')]
[('sprint', 'ORG')]
[('Bergen', 'LOC')]


In [132]:
before_df[before_df["Company_Name"].isin(["T-Mobile"])]['Text'].to_list()[40:50] # as the word doesnt overlap with anything else, it is easier to work with.
# There are tweets with both sprint and t-mobile and sprint is bought by t-mobile in 2020

['Worst of the Week: Have @Verizon, @ATT, @Sprint and @TMobile reached network nirvana? https://t.co/7KJjc8IOz2 https://t.co/8JAWIKR7HF',
 'Switch to @Sprint &amp; save 50% off most @Verizon, @ATT &amp; @Tmobile rates.  RT if you want a #Denver WIN! #SuperBold50 https://t.co/OOsrIBWk32',
 'Super Bowl Weekend, Super Bowl Savings! @sprint iPhone 5s for only $12.50/mo. Paired with your #50percentoff plan, #bestdealever    @TMobile',
 "T-Mobile's Binge On's video speed throttling leads to people opening Netflix slightly less, but watching  it longer. https://t.co/2WpqYyXMZm",
 'T-Mobile revives ‚ÄòGet a Tablet on Us‚Äô promo, offers discounts on other devices as well https://t.co/rTQ0QNjHzB',
 'Switch to @Sprint &amp; save 50% off most @Verizon, @ATT &amp; @Tmobile rates! RT if you want a #Carolina WIN! #SuperBold50 https://t.co/WO0kP3zOoq',
 'T-mobile has the worst service‚òπÔ∏è',
 'Who Selling A iPhone 6 With T-Mobile?  Hit My Dms.',
 'Free roses today with new service! Stop by  T-Mobile

In [133]:
before_df[before_df["Company_Name"].isin(["sprint"])]['Text'].to_list()[10:20] #sprint used a verb, use nltk to filter for proper nouns

["Great evening racing in the sprint at revs, didn't ride as well as I'd have liked but was a great experience #revolutionseries",
 'Bn on hld w @sprintcare 4 almost an hr. Convinced that @sprint has a co. policy of fraud &amp; inconveniencing customers. #SprintSucks',
 "I wish our data was unlimited but I just can't settle for sprint or T-Mobile garbage asses!",
 '#Football | Benefits of plyometric &amp; sprint trg in adolescent soccer players @MasterFutbolUPO  http://t.co/NECvkyxLXY http://t.co/LECaUUYqZk',
 '"@sprint_mgmt: BREAKING: Canadian Akeem Haynes runs 6.51 for 60m. A new world lead in the event! http://t.co/cBaM9AhiNl" @CorykSandC',
 'Mad sprint and track stand action right in front of me on the back straight! #RevolutionPix @RevolutionUK http://t.co/gjwwi4ucrU',
 '@JLeRoy01 Sprint at it again... Giving them the best deal to switch/add a line of service! #$100Credit http://t.co/s3dnLnj74P',
 '@relolutionseries 4 Keirin Final Glasgow @jack_carlin97 great sprint to be placed 2

In [134]:
after_df[after_df["Company_Name"].isin(["sprint"])]['Text'].to_list()[10:20]

['"@FoolieHypster: If someone says \'I didn\'t see your text\' they absolutely saw your text." If they got @sprint  they dead ass',
 'RadioShack may sell half its stores to Sprint, shut down the rest #technology http://t.co/jATyULyRgo http://t.co/EJB1qG2tu2',
 '@BamaStephen see the original full clip of #Eddie the #Sprint #Donkey http://t.co/iJNx4deL4l',
 '.@RadioShack may close, @sprint would get half of chain http://t.co/YfMUQ8hexi http://t.co/BaPeshQU3t',
 'RadioShack may sell half its stores to Sprint, shut down the rest #technology http://t.co/XQwoeglU4L http://t.co/k3kJMnnCjj',
 'RadioShack may sell half its stores to Sprint, shut down the rest #technology http://t.co/Qa1WApNLLt http://t.co/xBry4XwDyQ',
 'Sprint service suck so hard ..',
 'Report: Sprint in talks to buy RadioShack stores http://t.co/TnhVqM8Meq',
 '#NASCAR ‚Ä¶ 3 teams not running Sprint Unlimited. Here‚Äôs what 1 owner said why they‚Äôre skipping it: http://t.co/PU4QSK01HC @NASCARonNBC',
 'I SWEAR I HATE SPRINT TH

In [135]:
before_df[before_df["Company_Name"].isin(["Tide"])]['Text'].to_list()[40:50] #cannot find tide in most of the text

['Low tide at Nauset Light Beach. https://t.co/vM0pR18LYK https://t.co/CayBQZLBNx',
 'How the tides begin to turn. https://t.co/W0WiiR3G65',
 "@ericgarland Kent State murders: final straw that turned the tide of public opinion against the VN War. Protests didn't stop; the War did.",
 "It's here!  TIDES prequel, FIRST COMMAND is now live... and you can get it free at Instafreebie here... https://t.co/lGSnTFMUHG",
 '@VictorEriceira * #BREXIT &amp; #Trump is a rolling tide of people hungry for their identity back. It is a righteous goal. Pray.',
 'Thrid #commission for the great @Gazukull, his OC the beautiful Ariella, from Dead Tide series. #fantasy #mermaid #digitalart #illustration https://t.co/qxylGpHEEf',
 'GIRLS: SHERMAN TIDE 65, TYGARTS VALLEY BULLDOGS 42 (WVHIT). More final scores: https://t.co/h8yMyFuPjr',
 '"Just like moons and like suns, With the certainty of tides, Just like hopes springing high, Still I‚Äôll rise." - Angelou',
 'RNLI save dog walker cut off by rising tide on 

In [136]:
after_df[after_df["Company_Name"].isin(["Tide"])]['Text'].to_list()[40:50]#same issue

['@faltskog5450 @Godfather19811 going to be interesting when masses are hit personally and tide of opinion changes... what then?!?',
 'Of course Farage admires Putin. Putin via Assad created this flood tide of human suffering that Farage used for publicity. #Remain #Brexit https://t.co/IcQeMu8Iwu',
 "#Farage The tide is turning that grinning glaiket snake face is due some #BadKarma and it's coming .#Hasbeens #Farageinthegarage  &amp; #Nuttall https://t.co/0WGaWZbV0R",
 'Can France stem the populist tide that is conquering Europe ‚Äî And brought Donald Trump? (via @Pocket) https://t.co/cN69yuGafc',
 "So you don't want Sabrina to open for The Vamps because she is a girl and you only care about good looking bands like The Tide? Smh.",
 '@jfgroves @ArutzSheva_En    BLM, IdleNoMore, LeadNow now where does their funding come from?  Tides?Soros?  Follow the $ people!  Anarchists',
 '@OffsideTrust @steven_walters4 the tide is turning, thanks to @OffsideTrust - no rest until ALL clubs sign up.'

In [137]:
before_df[before_df["Company_Name"].isin(["weather tech"])]['Text'].to_list()[40:50] # some time the word is used in seperate places

['Did you hear? WeatherTech is sneaking into the Big Game this year! Catch the action on 2.13.22. #WeatherTech #FitCrew #BigGame https://t.co/SQhVs1CTNa',
 "The Mustang is coming off the disabled list today, in about one hours time. I truly hope this is the last of the minor problems for a while. Plus, I got Weather Tech mats for my birthday made specifically for the car. I can't wait to see how they look",
 'My 70+ old mum has always been tech-averse (she still uses a flip phone) so you can imagine how impressed I am that she has learned how to use Google Assistant (on a Nest Hub Max) for enquiries like the weather and to pull up Daily Mass YouTube or cooking channels on the TV‚Ä¶',
 'Quantum tech in space? Our remote system can monitor and control the environmental factors in our #quantum labs which can be used for inaccessible and unpredictable environments such as space, underground and unstable weather conditions.  https://t.co/9MbyovnGd4  @PhysicsAtSussex',
 'For February 11, the

In [138]:
after_df[after_df["Company_Name"].isin(["weather tech"])]['Text'].to_list()[40:50]#more weather tech comes up after superbowl maybe

['8/ @WeatherTech (MacNeil Auto) makes auto floor mats &amp; accessories. 2014 revenue of $450M https://t.co/pvbp44DHWs https://t.co/X5TJWndnYr',
 'Proud of a product I support üá∫üá∏ #madeintheUSA @WeatherTech @SuperBowl',
 'Cool to see the @WeatherTech Super Bowl ad. American made and the title sponsor of @IMSA racing.',
 '@CharlaLDraper Glad to hear it Charla! Thank you!',
 'Great ad, @WeatherTech. Diversity, excellence, and pride on display by workers making automotive products. üëç#MediaWeLike #SB50',
 '@Pinnacle_Ad @WeatherTech how could I not?! Thanks for a great ad.',
 'Bravo @WeatherTech - thanks for giving our fellow Americans a chance at the dream üá∫üá∏üôåüèΩ',
 "@BenCachiaras maybe you can buy #weathertech mats for the #acura so you don't make a mess eating #Doritos while using#razors",
 "There ya' go @USAlovelist : @WeatherTech for the #SuperBowl ad win with the #madeinAerica theme. Can't go wrong with that!",
 '@WeatherTech used skillful cinematography to praise A

In [139]:
before_df[before_df["Company_Name"].isin(["turbotax"])]['Text'].to_list()[40:50] 

['On behalf of @TurboTax &amp; @DiMeMedia ‚Äì Thank you so much for joining us tonight! ¬°Buenas noches! And happy tax season! #ConTurboTaxPuedes #Ad',
 'Follow TurboTax on Facebook for all the latest news and tips: https://t.co/GiFjrYlVQd #ConTurboTaxPuedes #Ad',
 "Many awesome thank you's for everything @turbotax! #ConTurboTaxPuedes https://t.co/Bv6JiyqSib",
 'On behalf of @TurboTax &amp; @DiMeMedia  ‚Äì Thank you so much for joining us tonight! ¬°Buenas noches! happy tax season! #ConTurboTaxPuedes #Ad',
 '@latinomarketing @turbotax cheers to tax season #ConTurboTaxPuedes https://t.co/hewSmKo41s',
 'Follow TurboTax on Facebook for all the latest news and tips: https://t.co/FccnAfYRMH #ConTurboTaxPuedes #Ad',
 '@latinomarketing @turbotax @MarinesDuarte @GabyNatale @paulabendfeldt @AndreaMinski @gusjournalist thanks everyone! #ConTurboTaxPuedes',
 'Follow @TurboTax on Twitter and Instagram to get the scoop on how to make the most of tax season! #ConTurboTaxPuedes #Ad',
 'Follow TurboTa

In [140]:
before_df[before_df["Company_Name"].isin(["wix"])]['Text'].to_list()[40:50] 

['@YouTube Oops @Wix I tots gave y‚Äôall a shout out and forgot to mention it!',
 "@ugetmade Good thing, indeed! If there's anything we can do to help, contact our team at @WixHelp",
 'No second swims in the 100 free, but SB times earned from Wix and Cannon! #sprinters #hardworkpays',
 '200 IM will see Jones swim in the Championship Final and Campbell swimming Consolation Final. Both swims SB Times, along with Wix! #threeforthree #aquahawks',
 '@___wix___ @itsmehaven Good morning popster! Please help us spread our official hashtags. Thank you!  #SarahGThis15Me #GoldenPantherAwards_SarahGeronimo',
 'Behind the Scenes of @Wix #SuperBowlAd | WATCH HERE &gt;&gt; http://t.co/1cRL6R080a  &lt;&lt; #ItsThatEasy @SBC2015',
 'My very own website! Created by me - made with @Wix http://t.co/Ez3C2w3RI9 #MyProWebsite #ItsThatEasy',
 'Heads up @mvp86hinesward: when you or anyone else says the word ‚ÄúEasy‚Äù during #SB49 everyone in America can win $$$ http://t.co/QwNQiCW1ky',
 'Heads up @TafoyaNBCSp

In [141]:
before_df[before_df["Company_Name"].isin(["squarespace"])]['Text'].to_list()[40:50] 

['Thanks! misterryanboydÔºö@archmotorcycle and @squarespace teamed up for a super bowl commercial... pretty pumped to see our work displayed on such a large scale. #superbowllii #ArchMotorcycle #KeanuReeves #GardHollinger  https://t.co/cDqA4Vz2tb https://t.co/hA9UWJjewO',
 '@Fusion87 @Foursquare @squarespace Thank you, you‚Äôre right.üò≥',
 'But on a serious note @squarespace will we ever get a template that has author profiles and a related post section that showcases blogs in a list format instead of a grid format? That would be my perfect template! @SquarespaceHelp',
 'misterryanboyd shared on instagram. #Archmotorcycle and @squarespace teamed up for a #superbowlcommercials #KRGT1 https://t.co/IhsKwR1iFV https://t.co/1K0bqXN6t3',
 "Thx! Arch Motorcycle This  Sunday Arch / Squarespace will be part of the many amazing commercials  to be shown at this year's Super Bowl. Have a look around to see the  videos. https://t.co/PEffGkpnLF #ArchMotorcycle #PerfmanceCruiser Enjoy.  https://t.c

In [142]:
before_df[before_df["Company_Name"].isin(["amazon alexa"])]['Text'].to_list()[40:50]

['Do you have an Amazon Alexa device? Make sure you add the Westcountry Pointing skill and then you can ask ‚Äòalexa, what‚Äôs in the news‚Äô to get the latest previews, results, reports and betting gossip from @PointingDC #GoPointing https://t.co/ux1KVLmyl7',
 "Get Amazon's 4K-Ready Fire TV Stick With Alexa Voice Remote For Just $39 Today [Usually $50] https://t.co/4bF1A1Ctvp https://t.co/MpdGHlnbpc",
 "An 'acoustic fingerprint' should keep Alexa from waking during Amazon's Super Bowl ad https://t.co/mvjjfFffi8 https://t.co/4Cs6VHfQ6V",
 "I find that the majority of skills for Amazon's Alexa are really poor quality, badly designed or irrelevant. Take this one for example that claims to determine if you are depressed (it actually asks 5 yes/know questions) #DigitalHealth #mentalhealth https://t.co/kFFj1XGeuR",
 'Here you can learn how to Connect Alexa to Wifi, how to Connect Echo to Wifi, how to Set up Amazon Echo to Wifi and many more things related to Amazon Alexa Visit here  https:/

In [143]:
before_df[before_df["Company_Name"].isin(["uber eats"])]['Text'].to_list()[40:50]

['Uber eats comment ils sont entrain d‚Äôme ruiner',
 'Just order pizza on @UberEats and I found the same delivery person from the latest weekends.  A very kind Indian young guy that comes in a small van with his mother that awaits for him in the back with a big smile.   This broke my heart and I my way to support him is tipping him https://t.co/CmokfoBFAE',
 'my @Uber account was hacked and only way to talk with @Uber_Support is to login to my account. How does that work when its been hacked and all the info has changed to login to it? WTF!!!! What say you @Uber  @Uber_Support @UberEats',
 '@damn_toe Ok?! Cus I‚Äôm ordering Uber eats or what ever‚Ä¶ my motha and sis gone get that damn cheeseburger x2',
 '@Uber_Support I have been trying for over six weeks to get four dollars it‚Äôs some odd change that is owed to me this is ridiculous I have stopped working for Uber eats until this is resolved how can we work not knowing if we‚Äôre going to get paid',
 'Bro I‚Äôve had uber eats meals 

### Using sentiment analysis

In [1]:
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/priankaball/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [145]:
sid = SentimentIntensityAnalyzer()

In [146]:
before_df['scores'] = before_df['Text'].apply(lambda Text: sid.polarity_scores(Text))

before_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory,scores
0,https://twitter.com/UWnt2/status/5616740628090...,2015-01-31 23:55:06+00:00,561674062809022465,Amazing weekend #off-road &amp; #camping at Th...,1,0,1,UWnt2,YOUNTO.com,433,711,Amazing weekend #off-road &amp; #camping at Th...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.774, 'pos': 0.226, 'comp..."
1,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:46:28+00:00,561671888288493568,List of things better than perfect Jeep weathe...,1,0,2,ShowDreamCar,Car Pictures,12775,19,List of things better than perfect Jeep weathe...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.615, 'pos': 0.385, 'comp..."
2,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:43:45+00:00,561671203966840834,#tbtuesday 1 yr ago when I first started worki...,1,0,1,ShowDreamCar,Car Pictures,12775,19,#tbtuesday 1 yr ago when I first started worki...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
3,https://twitter.com/_VictoriaManson/status/561...,2015-01-31 23:28:26+00:00,561667350819520512,"Okay, that jeep is beautiful! Loving everythin...",2,0,0,_VictoriaManson,Vicki,4432,4950,"Okay, that jeep is beautiful! Loving everythin...",31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.445, 'pos': 0.555, 'comp..."
4,https://twitter.com/Jeepings/status/5616670153...,2015-01-31 23:27:06+00:00,561667015312961536,They call my house The Jeep Garage ;) #jeeplif...,3,0,11,Jeepings,Northern Jeeper‚Ñ¢,1619,738,They call my house The Jeep Garage ;) #jeeplif...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.853, 'pos': 0.147, 'comp..."


In [147]:
before_df['compound']  = before_df['scores'].apply(lambda score_dict: score_dict['compound'])

before_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory,scores,compound
0,https://twitter.com/UWnt2/status/5616740628090...,2015-01-31 23:55:06+00:00,561674062809022465,Amazing weekend #off-road &amp; #camping at Th...,1,0,1,UWnt2,YOUNTO.com,433,711,Amazing weekend #off-road &amp; #camping at Th...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.774, 'pos': 0.226, 'comp...",0.5859
1,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:46:28+00:00,561671888288493568,List of things better than perfect Jeep weathe...,1,0,2,ShowDreamCar,Car Pictures,12775,19,List of things better than perfect Jeep weathe...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.615, 'pos': 0.385, 'comp...",0.784
2,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:43:45+00:00,561671203966840834,#tbtuesday 1 yr ago when I first started worki...,1,0,1,ShowDreamCar,Car Pictures,12775,19,#tbtuesday 1 yr ago when I first started worki...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
3,https://twitter.com/_VictoriaManson/status/561...,2015-01-31 23:28:26+00:00,561667350819520512,"Okay, that jeep is beautiful! Loving everythin...",2,0,0,_VictoriaManson,Vicki,4432,4950,"Okay, that jeep is beautiful! Loving everythin...",31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.445, 'pos': 0.555, 'comp...",0.8748
4,https://twitter.com/Jeepings/status/5616670153...,2015-01-31 23:27:06+00:00,561667015312961536,They call my house The Jeep Garage ;) #jeeplif...,3,0,11,Jeepings,Northern Jeeper‚Ñ¢,1619,738,They call my house The Jeep Garage ;) #jeeplif...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.853, 'pos': 0.147, 'comp...",0.2263


In [148]:
before_df['comp_score'] = before_df['compound'].apply(lambda c: 'neu' if c ==0 else ('pos' if c>0 else 'neg'))
#before_df['comp_score'] = before_df['compound'].apply(lambda c: 'pos' if c >=0 else 'neg')
before_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory,scores,compound,comp_score
0,https://twitter.com/UWnt2/status/5616740628090...,2015-01-31 23:55:06+00:00,561674062809022465,Amazing weekend #off-road &amp; #camping at Th...,1,0,1,UWnt2,YOUNTO.com,433,711,Amazing weekend #off-road &amp; #camping at Th...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.774, 'pos': 0.226, 'comp...",0.5859,pos
1,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:46:28+00:00,561671888288493568,List of things better than perfect Jeep weathe...,1,0,2,ShowDreamCar,Car Pictures,12775,19,List of things better than perfect Jeep weathe...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.615, 'pos': 0.385, 'comp...",0.784,pos
2,https://twitter.com/ShowDreamCar/status/561671...,2015-01-31 23:43:45+00:00,561671203966840834,#tbtuesday 1 yr ago when I first started worki...,1,0,1,ShowDreamCar,Car Pictures,12775,19,#tbtuesday 1 yr ago when I first started worki...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,neu
3,https://twitter.com/_VictoriaManson/status/561...,2015-01-31 23:28:26+00:00,561667350819520512,"Okay, that jeep is beautiful! Loving everythin...",2,0,0,_VictoriaManson,Vicki,4432,4950,"Okay, that jeep is beautiful! Loving everythin...",31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.445, 'pos': 0.555, 'comp...",0.8748,pos
4,https://twitter.com/Jeepings/status/5616670153...,2015-01-31 23:27:06+00:00,561667015312961536,They call my house The Jeep Garage ;) #jeeplif...,3,0,11,Jeepings,Northern Jeeper‚Ñ¢,1619,738,They call my house The Jeep Garage ;) #jeeplif...,31,1,2015,Jeep,before,"{'neg': 0.0, 'neu': 0.853, 'pos': 0.147, 'comp...",0.2263,pos


In [149]:
after_df['scores'] = after_df['Text'].apply(lambda Text: sid.polarity_scores(Text))
after_df['compound']  = after_df['scores'].apply(lambda score_dict: score_dict['compound'])
#after_df['comp_score'] = after_df['compound'].apply(lambda c: 'pos' if c >=0 else 'neg')
after_df['comp_score'] = after_df['compound'].apply(lambda c: 'neu' if c ==0 else ('pos' if c>0 else 'neg'))

In [150]:
after_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory,scores,compound,comp_score
0,https://twitter.com/billyg67/status/5623991633...,2015-02-02 23:56:24+00:00,562399163350925312,@SolarTJChick I would love for all jeepgirls t...,4,3,9,billyg67,Porsche & schmoopy,4435,4885,@SolarTJChick I would love for all jeepgirls t...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 0.71, 'pos': 0.29, 'compou...",0.8107,pos
1,https://twitter.com/thefox1019/status/56239886...,2015-02-02 23:55:12+00:00,562398865127530498,We're paying a visit to Bundoora RIGHT NOW! Ge...,1,0,2,thefox1019,The Fox 101.9,42613,296,We're paying a visit to Bundoora RIGHT NOW! Ge...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 0.872, 'pos': 0.128, 'comp...",0.5242,pos
2,https://twitter.com/ChadHaase/status/562397634...,2015-02-02 23:50:19+00:00,562397634976301056,Jeep stuck in mud in France 1944. http://t.co/...,11,1,14,ChadHaase,Chad Haase,1086,673,Jeep stuck in mud in France 1944. http://t.co/...,2,2,2015,Jeep,after,"{'neg': 0.222, 'neu': 0.778, 'pos': 0.0, 'comp...",-0.25,neg
3,https://twitter.com/CiscaPr/status/56239722094...,2015-02-02 23:48:40+00:00,562397220943589378,Good morning sexy people! Have a sexy Tuesday!...,2,1,7,CiscaPr,Francisca Prandayani,6077,1992,Good morning sexy people! Have a sexy Tuesday!...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 0.387, 'pos': 0.613, 'comp...",0.9358,pos
4,https://twitter.com/FlexinJC/status/5623969134...,2015-02-02 23:47:27+00:00,562396913417617408,@YellowJeepJewel Wow. You don't need people li...,1,1,2,FlexinJC,James Miller,1156,2090,@YellowJeepJewel Wow. You don't need people li...,2,2,2015,Jeep,after,"{'neg': 0.257, 'neu': 0.593, 'pos': 0.15, 'com...",-0.3991,neg


In [151]:
after_superbowl_df['scores'] = after_superbowl_df['Text'].apply(lambda Text: sid.polarity_scores(Text))
after_superbowl_df['compound']  = after_superbowl_df['scores'].apply(lambda score_dict: score_dict['compound'])
#after_superbowl_df['comp_score'] = after_superbowl_df['compound'].apply(lambda c: 'pos' if c >=0 else 'neg')
after_superbowl_df['comp_score'] = after_superbowl_df['compound'].apply(lambda c: 'neu' if c ==0 else ('pos' if c>0 else 'neg'))

In [152]:
after_superbowl_df.head()

Unnamed: 0,url,Datetime,Tweet Id,Text,Retweet Count,Reply Count,Like Count,Username,Display Name,Followers Count,Friends Count,Rendered Content,Day,Month,Year,Company_Name,Directory,scores,compound,comp_score
0,https://twitter.com/NihadAwad/status/562388095...,2015-02-02 23:12:25+00:00,562388095522193410,We should all commend @Jeep for being inclusiv...,2,0,5,NihadAwad,Nihad Awad,35774,1849,We should all commend @Jeep for being inclusiv...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 0.834, 'pos': 0.166, 'comp...",0.4926,pos
1,https://twitter.com/i4unews/status/56238505431...,2015-02-02 23:00:20+00:00,562385054312464384,Jeep #SuperBowl 2015 Ad - I4U News http://t.co...,2,0,0,i4unews,I4U News,12101,4650,Jeep #SuperBowl 2015 Ad - I4U News buff.ly/1HI...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,neu
2,https://twitter.com/LoveInshAllah/status/56237...,2015-02-02 22:26:35+00:00,562376561643372544,Make sure to thank @Jeep! ‚ÄúRacists whine abt i...,1,0,2,LoveInshAllah,"Love, InshAllah",6774,1842,Make sure to thank @Jeep! ‚ÄúRacists whine abt i...,2,2,2015,Jeep,after,"{'neg': 0.121, 'neu': 0.631, 'pos': 0.247, 'co...",0.3802,pos
3,https://twitter.com/MirDAliZ/status/5623749644...,2015-02-02 22:20:14+00:00,562374964473372672,Peace is the message that was being conveyed b...,2,0,1,MirDAliZ,MIR DAWOOD ALI,15,89,Peace is the message that was being conveyed b...,2,2,2015,Jeep,after,"{'neg': 0.0, 'neu': 0.857, 'pos': 0.143, 'comp...",0.5423,pos
4,https://twitter.com/MaryWbn/status/56236490636...,2015-02-02 21:40:16+00:00,562364906365005826,Embarrassing ‚Äú@Adweek: Jeep's #SuperBowl ad is...,1,0,0,MaryWbn,Mary Weatherburn,1378,1779,Embarrassing ‚Äú@Adweek: Jeep's #SuperBowl ad is...,2,2,2015,Jeep,after,"{'neg': 0.148, 'neu': 0.852, 'pos': 0.0, 'comp...",-0.3818,neg


In [153]:
before_df.groupby('comp_score')['Text'].count()

comp_score
neg     5640
neu    10505
pos    16284
Name: Text, dtype: int64

In [154]:
before_df.groupby('comp_score')['Text'].count()/before_df.shape[0]

comp_score
neg    0.173918
neu    0.323938
pos    0.502143
Name: Text, dtype: float64

In [155]:
before_df.groupby(['comp_score', 'Company_Name', 'Year'])['Text'].count().reset_index()

Unnamed: 0,comp_score,Company_Name,Year,Text
0,neg,Avocado from Mexico,2018,2
1,neg,Avocado from Mexico,2019,4
2,neg,Avocado from Mexico,2020,3
3,neg,Avocado from Mexico,2022,1
4,neg,Budlight,2015,33
...,...,...,...,...
266,pos,weather tech,2021,10
267,pos,weather tech,2022,9
268,pos,wix,2015,47
269,pos,wix,2017,29


In [156]:
after_df.groupby('comp_score')['Text'].count()

comp_score
neg     9277
neu    13869
pos    28871
Name: Text, dtype: int64

In [157]:
after_df.groupby('comp_score')['Text'].count()/after_df.shape[0]

comp_score
neg    0.178346
neu    0.266624
pos    0.555030
Name: Text, dtype: float64

In [158]:
after_superbowl_df.groupby('comp_score')['Text'].count()

comp_score
neg    2123
neu    3793
pos    7191
Name: Text, dtype: int64

In [159]:
after_superbowl_df.groupby('comp_score')['Text'].count()/after_superbowl_df.shape[0]

comp_score
neg    0.161975
neu    0.289387
pos    0.548638
Name: Text, dtype: float64

In [160]:
before_df[before_df["comp_score"].isin(["pos"])]['Text'].to_list()[:-1]

['Amazing weekend #off-road &amp; #camping at The Cove https://t.co/5Xk64DVrMk #outdoors #camping @HappyJeeps @Jeep_Family http://t.co/trQfxww9cQ',
 'List of things better than perfect Jeep weather http://t.co/ZgUm9QG93j http://t.co/8lSzQyWh4B @ceo0831 @f47128c50bd1417 @senna54321 !',
 'Okay, that jeep is beautiful! Loving everything about it. #Wow http://t.co/pwcvjp7LL8',
 'They call my house The Jeep Garage ;) #jeeplife #jeepmafia #jeepfamily http://t.co/gIXiJ9wyko',
 'Rt for jeep fav for car',
 'Cant believe all the people and friends you make being apart of the Jeep family! #jeepmafia #Jeepfamily #jeeplife',
 '‚Äú@Sarah_Botes:   We made it! So much fun today!   @thewarriorrace @Jeep_SA #Warrior1 #10km üôà   http://t.co/stiErRj7wv‚Äù  / Jeep South Africa',
 'Working on my Jeep and these two showed up and decided to talk about prophets... Was about to hand them a wrench. üòÖüòÇ http://t.co/mYAvBUZHEd',
 '‚Äú@mxchelle7: Torn apart from a Jeep and a Benz üòñ‚Äù JEEPS ARE FUCKING SE

In [161]:
before_result = before_df.groupby(['comp_score', 'Company_Name', 'Year'])['Text'].count().reset_index()
after_result = after_df.groupby(['comp_score', 'Company_Name', 'Year'])['Text'].count().reset_index()
after_superbowl_result = after_superbowl_df.groupby(['comp_score', 'Company_Name', 'Year'])['Text'].count().reset_index()

### Analysis on Sample Dataset

In [162]:
before_result[before_result['Company_Name'].isin(['T-Mobile'])].reset_index(drop=True)

Unnamed: 0,comp_score,Company_Name,Year,Text
0,neg,T-Mobile,2015,61
1,neg,T-Mobile,2016,39
2,neg,T-Mobile,2017,67
3,neg,T-Mobile,2018,39
4,neg,T-Mobile,2019,17
5,neg,T-Mobile,2021,85
6,neg,T-Mobile,2022,65
7,neu,T-Mobile,2015,101
8,neu,T-Mobile,2016,103
9,neu,T-Mobile,2017,192


In [163]:
before_df[before_df['Company_Name'].isin(['T-Mobile'])].groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1)

Unnamed: 0,Year,Company_Name,Total_Tweets
0,2015,T-Mobile,338
1,2016,T-Mobile,278
2,2017,T-Mobile,627
3,2018,T-Mobile,242
4,2019,T-Mobile,209
5,2021,T-Mobile,394
6,2022,T-Mobile,250


In [164]:
join_before = pd.merge(before_df[before_df['Company_Name'].isin(['T-Mobile'])].groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1), 
        before_result[before_result['Company_Name'].isin(['T-Mobile'])].reset_index(drop=True),  
        how='left', 
        left_on=['Company_Name','Year'], 
        right_on = ['Company_Name','Year'])

join_before.head()

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text
0,2015,T-Mobile,338,neg,61
1,2015,T-Mobile,338,neu,101
2,2015,T-Mobile,338,pos,176
3,2016,T-Mobile,278,neg,39
4,2016,T-Mobile,278,neu,103


In [165]:
join_before['perc'] = join_before['Text']/join_before['Total_Tweets']
join_before.head()

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text,perc
0,2015,T-Mobile,338,neg,61,0.180473
1,2015,T-Mobile,338,neu,101,0.298817
2,2015,T-Mobile,338,pos,176,0.52071
3,2016,T-Mobile,278,neg,39,0.140288
4,2016,T-Mobile,278,neu,103,0.370504


In [166]:
after_result[after_result['Company_Name'].isin(['T-Mobile'])]

Unnamed: 0,comp_score,Company_Name,Year,Text
48,neg,T-Mobile,2015,69
49,neg,T-Mobile,2016,115
50,neg,T-Mobile,2017,147
51,neg,T-Mobile,2018,209
52,neg,T-Mobile,2019,98
53,neg,T-Mobile,2021,76
54,neg,T-Mobile,2022,99
140,neu,T-Mobile,2015,86
141,neu,T-Mobile,2016,175
142,neu,T-Mobile,2017,193


In [167]:
after_df[after_df['Company_Name'].isin(['T-Mobile'])].groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1)

Unnamed: 0,Year,Company_Name,Total_Tweets
0,2015,T-Mobile,750
1,2016,T-Mobile,750
2,2017,T-Mobile,750
3,2018,T-Mobile,750
4,2019,T-Mobile,750
5,2021,T-Mobile,635
6,2022,T-Mobile,456


In [168]:
join_after = pd.merge(after_df[after_df['Company_Name'].isin(['T-Mobile'])].groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1), 
        after_result[after_result['Company_Name'].isin(['T-Mobile'])].reset_index(drop=True),  
        how='left', 
        left_on=['Company_Name','Year'], 
        right_on = ['Company_Name','Year'])

join_after['perc'] = join_after['Text']/join_after['Total_Tweets']
join_after.head()

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text,perc
0,2015,T-Mobile,750,neg,69,0.092
1,2015,T-Mobile,750,neu,86,0.114667
2,2015,T-Mobile,750,pos,595,0.793333
3,2016,T-Mobile,750,neg,115,0.153333
4,2016,T-Mobile,750,neu,175,0.233333


In [169]:
join_after

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text,perc
0,2015,T-Mobile,750,neg,69,0.092
1,2015,T-Mobile,750,neu,86,0.114667
2,2015,T-Mobile,750,pos,595,0.793333
3,2016,T-Mobile,750,neg,115,0.153333
4,2016,T-Mobile,750,neu,175,0.233333
5,2016,T-Mobile,750,pos,460,0.613333
6,2017,T-Mobile,750,neg,147,0.196
7,2017,T-Mobile,750,neu,193,0.257333
8,2017,T-Mobile,750,pos,410,0.546667
9,2018,T-Mobile,750,neg,209,0.278667


In [170]:
join_after = join_after.rename({'perc': 'after_perc'}, axis = 1)
join_after.head()

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text,after_perc
0,2015,T-Mobile,750,neg,69,0.092
1,2015,T-Mobile,750,neu,86,0.114667
2,2015,T-Mobile,750,pos,595,0.793333
3,2016,T-Mobile,750,neg,115,0.153333
4,2016,T-Mobile,750,neu,175,0.233333


In [171]:
join_before = join_before.rename({'perc': 'before_perc'}, axis = 1)
join_before.head()

Unnamed: 0,Year,Company_Name,Total_Tweets,comp_score,Text,before_perc
0,2015,T-Mobile,338,neg,61,0.180473
1,2015,T-Mobile,338,neu,101,0.298817
2,2015,T-Mobile,338,pos,176,0.52071
3,2016,T-Mobile,278,neg,39,0.140288
4,2016,T-Mobile,278,neu,103,0.370504


In [172]:
join_final = pd.merge(join_before, join_after,   
        how='left', 
        left_on=['Company_Name','Year', 'comp_score'], 
        right_on = ['Company_Name','Year', 'comp_score'])

join_final.head()

Unnamed: 0,Year,Company_Name,Total_Tweets_x,comp_score,Text_x,before_perc,Total_Tweets_y,Text_y,after_perc
0,2015,T-Mobile,338,neg,61,0.180473,750,69,0.092
1,2015,T-Mobile,338,neu,101,0.298817,750,86,0.114667
2,2015,T-Mobile,338,pos,176,0.52071,750,595,0.793333
3,2016,T-Mobile,278,neg,39,0.140288,750,115,0.153333
4,2016,T-Mobile,278,neu,103,0.370504,750,175,0.233333


In [173]:
def categorize(row):
    if row['after_perc'] > row['before_perc']:
        return 'increase'
    return 'decrease'

In [174]:
join_final['result'] = join_final.apply(lambda row: categorize(row), axis = 1)
join_final.head()

Unnamed: 0,Year,Company_Name,Total_Tweets_x,comp_score,Text_x,before_perc,Total_Tweets_y,Text_y,after_perc,result
0,2015,T-Mobile,338,neg,61,0.180473,750,69,0.092,decrease
1,2015,T-Mobile,338,neu,101,0.298817,750,86,0.114667,decrease
2,2015,T-Mobile,338,pos,176,0.52071,750,595,0.793333,increase
3,2016,T-Mobile,278,neg,39,0.140288,750,115,0.153333,increase
4,2016,T-Mobile,278,neu,103,0.370504,750,175,0.233333,decrease


-----

### Analysis on all datasets

In [175]:
join_before = pd.merge(before_df.groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1), 
        before_result.reset_index(drop=True),  
        how='left', 
        left_on=['Company_Name','Year'], 
        right_on = ['Company_Name','Year'])

join_before['perc'] = join_before['Text']/join_before['Total_Tweets']

join_after = pd.merge(after_df.groupby(['Year', 'Company_Name'])['Text'].count().reset_index().rename({'Text': 'Total_Tweets'}, axis = 1), 
        after_result.reset_index(drop=True),  
        how='left', 
        left_on=['Company_Name','Year'], 
        right_on = ['Company_Name','Year'])

join_after['perc'] = join_after['Text']/join_after['Total_Tweets']


join_after = join_after.rename({'perc': 'after_perc'}, axis = 1)
join_before = join_before.rename({'perc': 'before_perc'}, axis = 1)

join_final = pd.merge(join_before, join_after,   
        how='left', 
        left_on=['Company_Name','Year', 'comp_score'], 
        right_on = ['Company_Name','Year', 'comp_score'])
        
def categorize(row):
    if row['after_perc'] > row['before_perc']:
        return 'increase'
    return 'decrease'

join_final['result'] = join_final.apply(lambda row: categorize(row), axis = 1)

join_final.head()

Unnamed: 0,Year,Company_Name,Total_Tweets_x,comp_score,Text_x,before_perc,Total_Tweets_y,Text_y,after_perc,result
0,2015,Avocado from Mexico,20,neu,6,0.3,295,80,0.271186,decrease
1,2015,Avocado from Mexico,20,pos,14,0.7,295,182,0.616949,decrease
2,2015,Budlight,385,neg,33,0.085714,750,106,0.141333,increase
3,2015,Budlight,385,neu,170,0.441558,750,217,0.289333,decrease
4,2015,Budlight,385,pos,182,0.472727,750,427,0.569333,increase


In [176]:
join_final

Unnamed: 0,Year,Company_Name,Total_Tweets_x,comp_score,Text_x,before_perc,Total_Tweets_y,Text_y,after_perc,result
0,2015,Avocado from Mexico,20,neu,6,0.300000,295,80,0.271186,decrease
1,2015,Avocado from Mexico,20,pos,14,0.700000,295,182,0.616949,decrease
2,2015,Budlight,385,neg,33,0.085714,750,106,0.141333,increase
3,2015,Budlight,385,neu,170,0.441558,750,217,0.289333,decrease
4,2015,Budlight,385,pos,182,0.472727,750,427,0.569333,increase
...,...,...,...,...,...,...,...,...,...,...
266,2022,uber eats,322,neu,99,0.307453,696,207,0.297414,decrease
267,2022,uber eats,322,pos,124,0.385093,696,311,0.446839,increase
268,2022,weather tech,19,neg,4,0.210526,43,5,0.116279,decrease
269,2022,weather tech,19,neu,6,0.315789,43,13,0.302326,decrease
