# Imports

In [69]:
import pandas as pd

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import en_core_web_sm

from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.base import TransformerMixin
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

### Setup Spacy NLP

In [70]:
nlp = en_core_web_sm.load()

### Load Dataset(s)

In [71]:
df = pd.read_csv('data/train.csv')

### Explore & Clean Data

In [72]:
df.head(25)

Unnamed: 0,2401,Borderlands,1,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,1,I am coming to the borders and I will kill you...
1,2401,Borderlands,1,im getting on borderlands and i will kill you ...
2,2401,Borderlands,1,im coming on borderlands and i will murder you...
3,2401,Borderlands,1,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,1,im getting into borderlands and i can murder y...
5,2402,Borderlands,1,So I spent a few hours making something for fu...
6,2402,Borderlands,1,So I spent a couple of hours doing something f...
7,2402,Borderlands,1,So I spent a few hours doing something for fun...
8,2402,Borderlands,1,So I spent a few hours making something for fu...
9,2402,Borderlands,1,2010 So I spent a few hours making something f...


In [73]:
df.columns = ['id', 'entity', 'sentiment', 'review']

In [74]:
train = df.drop(columns=['id', 'entity'])

In [75]:
train.head(25)

Unnamed: 0,sentiment,review
0,1,I am coming to the borders and I will kill you...
1,1,im getting on borderlands and i will kill you ...
2,1,im coming on borderlands and i will murder you...
3,1,im getting on borderlands 2 and i will murder ...
4,1,im getting into borderlands and i can murder y...
5,1,So I spent a few hours making something for fu...
6,1,So I spent a couple of hours doing something f...
7,1,So I spent a few hours doing something for fun...
8,1,So I spent a few hours making something for fu...
9,1,2010 So I spent a few hours making something f...


In [76]:
train.value_counts('sentiment')

sentiment
 0    31308
-1    22542
 1    20831
dtype: int64

In [77]:
train = train[train['sentiment'] != 0]

In [78]:
train.value_counts('sentiment')

sentiment
-1    22542
 1    20831
dtype: int64

In [79]:
train['sentiment'].mask(train['sentiment'] == -1, 0, inplace=True)

In [80]:
train.value_counts('sentiment')

sentiment
0    22542
1    20831
dtype: int64

In [81]:
train.isna().sum()

sentiment      0
review       361
dtype: int64

In [82]:
train.dropna(axis=0, inplace=True)

In [83]:
stopwords = list(STOP_WORDS)
stopwords

['five',
 "'s",
 'again',
 'him',
 'indeed',
 'my',
 'whence',
 'here',
 'will',
 'he',
 'done',
 'than',
 'put',
 'elsewhere',
 '‘ve',
 'do',
 'beforehand',
 'was',
 'eleven',
 'own',
 'we',
 'why',
 'have',
 'now',
 'about',
 'has',
 'same',
 'someone',
 'nine',
 'twenty',
 'anything',
 'from',
 '’m',
 'this',
 'least',
 'only',
 'their',
 'one',
 'enough',
 'whither',
 'all',
 'already',
 'fifty',
 'both',
 'how',
 "n't",
 'n‘t',
 'through',
 'many',
 'few',
 'must',
 'are',
 'whereupon',
 'nothing',
 'make',
 'name',
 'mine',
 'within',
 'several',
 'seeming',
 'very',
 'were',
 'however',
 'therein',
 'thus',
 'those',
 'meanwhile',
 'should',
 'top',
 'wherein',
 'across',
 'seem',
 'itself',
 'afterwards',
 'his',
 'much',
 'regarding',
 'did',
 'sixty',
 'our',
 'together',
 'none',
 'beside',
 'perhaps',
 'no',
 'made',
 'in',
 'by',
 'quite',
 'thru',
 'it',
 'n’t',
 "'ll",
 'well',
 'whose',
 'hundred',
 'eight',
 'nobody',
 'get',
 'without',
 'full',
 'herself',
 'us',
 'p

In [84]:
def clean_text(s):
    split_text = s.split()
    cleaned_text = [word for word in split_text if word not in stopwords]
    final_text = ' '.join(cleaned_text)
    return final_text

In [85]:
text = "So I spent a couple of hours doing something for fun... If you don't know that I'm a huge @ Borderlands fan and Maya is one of my favorite characters, I decided to make a wallpaper for my PC.. Her..."

clean_text(text)

"So I spent couple hours fun... If don't know I'm huge @ Borderlands fan Maya favorite characters, I decided wallpaper PC.. Her..."

In [86]:
train['review'] = train['review'].apply(clean_text)

In [87]:
train.head(25)

Unnamed: 0,sentiment,review
0,1,"I coming borders I kill all,"
1,1,"im getting borderlands kill all,"
2,1,"im coming borderlands murder all,"
3,1,"im getting borderlands 2 murder all,"
4,1,"im getting borderlands murder all,"
5,1,So I spent hours making fun. . . If don't know...
6,1,So I spent couple hours fun... If don't know I...
7,1,So I spent hours fun... If don't know I'm HUGE...
8,1,So I spent hours making fun. . . If don't know...
9,1,2010 So I spent hours making fun. . . If don't...


### Setup sklearn imports

### Create Pipe

# Check Model

### Accuracy

### Run tests on examples