# Business Problem

Decoding the responses of products displayed at South by South West (SXSW) and seeing which product was seen to be the most favorable.

# Importing and Data Read in

In [128]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline

import nltk
nltk.download("stopwords")
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Owner\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [129]:
#read in tweets from data, about 9000 rows and three columns
df = pd.read_csv("data/twitter_data.csv", encoding="unicode_escape")

In [130]:
#dropped one tweet that had a NA value
df = df.loc[df["tweet_text"].notna()]

In [131]:
def preprocess_text(tweet):
    tweet = tweet.lower()
    tweet = RegexpTokenizer(r"(?u)\w{3,}").tokenize(tweet)
    tweet = [word for word in tweet if word not in stopwords.words("english")]
    tweet = [PorterStemmer().stem(word) for word in tweet]
    return tweet

df["preprocessed_text"] = [preprocess_text(tweet) for tweet in df["tweet_text"]]

In [159]:
def company(tweet):
    if "appl" in tweet or "iphon" in tweet or "ipad" in tweet:
        return "apple"
    elif "googl" in tweet or "android" in tweet:
        return "google"

df["predict_company"] = [company(tweet) for tweet in df["preprocessed_text"]]

In [153]:
def directed_at(tweet):
    if "app" in tweet:
        return "app"
    elif "ipad" in tweet:
        return "ipad"
    elif "iphon" in tweet:
        return "iphone"

df["predict_directed"] = [directed_at(tweet) for tweet in df["preprocessed_text"]]

In [160]:
df["predict_company"].value_counts()

apple     5477
google    2783
Name: predict_company, dtype: int64

In [157]:
df["preprocessed_text"][4]

['sxtxstate',
 'great',
 'stuff',
 'fri',
 'sxsw',
 'marissa',
 'mayer',
 'googl',
 'tim',
 'reilli',
 'tech',
 'book',
 'confer',
 'amp',
 'matt',
 'mullenweg',
 'wordpress']

In [158]:
df['tweet_text'][4]

"@sxtxstate great stuff on Fri #SXSW: Marissa Mayer (Google), Tim O'Reilly (tech books/conferences) &amp; Matt Mullenweg (Wordpress)"

In [161]:
df

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product,preprocessed_text,predict_company,predict_directed
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion,"[wesley83, iphon, hr, tweet, rise_austin, dead...",apple,iphone
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion,"[jessede, know, fludapp, awesom, ipad, iphon, ...",apple,app
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion,"[swonderlin, wait, ipad, also, sale, sxsw]",apple,ipad
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion,"[sxsw, hope, year, festiv, crashi, year, iphon...",apple,app
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion,"[sxtxstate, great, stuff, fri, sxsw, marissa, ...",google,
...,...,...,...,...,...,...
9088,Ipad everywhere. #SXSW {link},iPad,Positive emotion,"[ipad, everywher, sxsw, link]",apple,ipad
9089,"Wave, buzz... RT @mention We interrupt your re...",,No emotion toward brand or product,"[wave, buzz, mention, interrupt, regularli, sc...",google,
9090,"Google's Zeiger, a physician never reported po...",,No emotion toward brand or product,"[googl, zeiger, physician, never, report, pote...",google,
9091,Some Verizon iPhone customers complained their...,,No emotion toward brand or product,"[verizon, iphon, custom, complain, time, fell,...",apple,iphone


In [162]:
df["emotion_in_tweet_is_directed_at"].value_counts()

iPad                               946
Apple                              661
iPad or iPhone App                 470
Google                             430
iPhone                             297
Other Google product or service    293
Android App                         81
Android                             78
Other Apple product or service      35
Name: emotion_in_tweet_is_directed_at, dtype: int64