In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import nltk
from nltk.tokenize import regexp_tokenize, word_tokenize, RegexpTokenizer
from nltk.corpus import stopwords, wordnet
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix

In [2]:
df = pd.read_csv('data/judge-1377884607_tweet_product_company.csv', encoding="ISO-8859-1")

In [3]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 3 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
dtypes: object(3)
memory usage: 213.2+ KB


In [5]:
df['tweet_text'].value_counts

<bound method IndexOpsMixin.value_counts of 0       .@wesley83 I have a 3G iPhone. After 3 hrs twe...
1       @jessedee Know about @fludapp ? Awesome iPad/i...
2       @swonderlin Can not wait for #iPad 2 also. The...
3       @sxsw I hope this year's festival isn't as cra...
4       @sxtxstate great stuff on Fri #SXSW: Marissa M...
                              ...                        
9088                        Ipad everywhere. #SXSW {link}
9089    Wave, buzz... RT @mention We interrupt your re...
9090    Google's Zeiger, a physician never reported po...
9091    Some Verizon iPhone customers complained their...
9092    Ï¡Ïàü_ÊÎÒ£Áââ_£â_ÛâRT @...
Name: tweet_text, Length: 9093, dtype: object>

In [6]:
#keywords TBD will review sentiment
apple_keywords = ['iPad', 'IPad', 'i-Pad', 'IPAD', 'iphone', 'iPhone', 'Iphone', 'i-phone', 'I-Phone', 'Apple', 'apple', 'Ipad', 'ipad', 'iTunes','Mac', 'IPhone', 'IPHONE']
google_keywords = ['google', 'android', 'Google', 'Android', 'samsung', 'GOOGLE']

def find_brand(text):
    if isinstance(text, str):
        for word in apple_keywords:
            if word in text:
                return "Apple" 
        for word in google_keywords:
            if word in text:
                return "Google" 
    return 'No_Product'

df['product'] = df['tweet_text'].apply(find_brand)


In [7]:
df['product'].value_counts()

Apple         5587
Google        2771
No_Product     735
Name: product, dtype: int64

In [8]:
df['product'].isna().sum()

0

In [9]:
df['product'].head(500)

0           Apple
1           Apple
2           Apple
3           Apple
4          Google
          ...    
495         Apple
496         Apple
497    No_Product
498         Apple
499         Apple
Name: product, Length: 500, dtype: object

In [10]:
df['tweet_text'][497]

'Get #SXSW film red carpet coverage from @mention CW Austin Star Mandy Dugan on Grouped{in}, get the app {link}'

In [11]:
mapping = {"Positive emotion": 2, "No emotion toward brand or product": 1, "I can't tell": 1, "Negative emotion": 0}
df['emotion'] = df["is_there_an_emotion_directed_at_a_brand_or_product"].map(mapping)

In [12]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,product,emotion
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,Apple,0
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,Apple,2
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,Apple,2
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,Apple,0
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,Google,2


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 5 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
 3   product                                             9093 non-null   object
 4   emotion                                             9093 non-null   int64 
dtypes: int64(1), object(4)
memory usage: 355.3+ KB


In [14]:
#use 1 to show all info in cell below then comment it out and use 2 to go back to default view
#1
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

#2
#pd.reset_option('display.max_rows')
#pd.reset_option('display.max_colwidth')

In [15]:
#exploring 'No_Product' tweets
filtered_df = df[df['product'] == 'No_Product']

print(filtered_df['tweet_text'])

6                                                                                                                                                                  NaN
51                                    ÛÏ@mention {link} &lt;-- HELP ME FORWARD THIS DOC to all Anonymous accounts, techies,&amp; ppl who can help us JAM #libya #SXSW
52                                                                                     ÷¼ WHAT? ÷_ {link} ã_ #edchat #musedchat #sxsw #sxswi #classical #newTwitter
53                                                                        .@mention @mention on the location-based 'fast, fun and future' - {link} (via @mention #sxsw
65                       Agree. RT @mention Wait. FIONA APPLE is in town??? Somebody kidnap her and put her in a recording studio until she records a new album. #sxsw
66                                                                            At #sxsw? @mention / @mention wanna buy you a drink. 7pm at Fado on 4th. {link} Join us

In [16]:
print(df['tweet_text'][65])
print(df['tweet_text'][3079])
print(df['tweet_text'][4478])
#ill have to manually sort these

Agree. RT @mention Wait. FIONA APPLE is in town??? Somebody kidnap her and put her in a recording studio until she records a new album. #sxsw
Can I tweet this if I only use APPLE :) RT @mention Microsoft's DPE will b SXSWi &amp; Tweeting @mention #microsoft &amp; #sxsw
Wait. FIONA APPLE is in town??? Somebody kidnap her and put her in a recording studio until she records a new album. #sxsw


In [17]:
#not sure about this prob nothing
print(df.iloc[7013])

tweet_text                                            FOUND iPHONE: Ballroom D just now. Volunteer at door has it. #sxsw #sxswi
emotion_in_tweet_is_directed_at                                                                                             NaN
is_there_an_emotion_directed_at_a_brand_or_product                                           No emotion toward brand or product
product                                                                                                              No_Product
emotion                                                                                                                       1
Name: 7013, dtype: object


In [18]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Assuming df is your DataFrame with tweet texts
# Example:
# df = pd.DataFrame({'tweet_text': ["I love this place!", "This movie is terrible.", "Feeling neutral about this weather."]})

# Drop NaN values or replace with empty strings
df['tweet_text'].fillna('', inplace=True)

# Analyze sentiment for each tweet and add the scores to the DataFrame
df['sentiment'] = df['tweet_text'].apply(lambda tweet: analyzer.polarity_scores(tweet))

# Extract compound sentiment score from the sentiment dictionary
df['compound_score'] = df['sentiment'].apply(lambda score_dict: score_dict['compound'])

# Function to categorize compound score into positive, negative, or neutral
def categorize_sentiment(score):
    if score > 0.05:
        return '2'
    elif score < -0.05:
        return '0'
    else:
        return '1'

# Apply the function to create a new column with sentiment category
df['sentiment_category'] = df['compound_score'].apply(categorize_sentiment)

# Print or use the DataFrame as per your requirement
print(df[['tweet_text', 'compound_score', 'sentiment_category']])

                                                                                                                                                                              tweet_text  \
0                                                        .@wesley83 I have a 3G iPhone. After 3 hrs tweeting at #RISE_Austin, it was dead!  I need to upgrade. Plugin stations at #SXSW.   
1                                            @jessedee Know about @fludapp ? Awesome iPad/iPhone app that you'll likely appreciate for its design. Also, they're giving free Ts at #SXSW   
2                                                                                                        @swonderlin Can not wait for #iPad 2 also. They should sale them down at #SXSW.   
3                                                                                                     @sxsw I hope this year's festival isn't as crashy as this year's iPhone app. #sxsw   
4                                                    @sxtxst

In [19]:
df.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,product,emotion,sentiment,compound_score,sentiment_category
0,".@wesley83 I have a 3G iPhone. After 3 hrs tweeting at #RISE_Austin, it was dead! I need to upgrade. Plugin stations at #SXSW.",iPhone,Negative emotion,Apple,0,"{'neg': 0.173, 'neu': 0.827, 'pos': 0.0, 'compound': -0.68}",-0.68,0
1,"@jessedee Know about @fludapp ? Awesome iPad/iPhone app that you'll likely appreciate for its design. Also, they're giving free Ts at #SXSW",iPad or iPhone App,Positive emotion,Apple,2,"{'neg': 0.0, 'neu': 0.59, 'pos': 0.41, 'compound': 0.91}",0.91,2
2,@swonderlin Can not wait for #iPad 2 also. They should sale them down at #SXSW.,iPad,Positive emotion,Apple,2,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0,1
3,@sxsw I hope this year's festival isn't as crashy as this year's iPhone app. #sxsw,iPad or iPhone App,Negative emotion,Apple,0,"{'neg': 0.0, 'neu': 0.681, 'pos': 0.319, 'compound': 0.7269}",0.7269,2
4,"@sxtxstate great stuff on Fri #SXSW: Marissa Mayer (Google), Tim O'Reilly (tech books/conferences) &amp; Matt Mullenweg (Wordpress)",Google,Positive emotion,Google,2,"{'neg': 0.0, 'neu': 0.796, 'pos': 0.204, 'compound': 0.6249}",0.6249,2


#vader sucks lets generate negatives