# Importing libraries

In [1]:
import pandas as pd
import numpy as np

# Loading the dataset

In [2]:
data = pd.read_csv('tweets.csv')

In [3]:
data.head()

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1...
1,2,0,Finally a transparant silicon case ^^ Thanks t...
2,3,0,We love this! Would you go? #talk #makememorie...
3,4,0,I'm wired I know I'm George I was made that wa...
4,5,1,What amazing service! Apple won't even talk to...


In [4]:
data.shape

(7920, 3)

# Data Preprocessing

In [6]:
import gensim

In [7]:
data['tweet_clean'] = data['tweet'].apply(lambda x:gensim.utils.simple_preprocess(x))

In [8]:
data.head()

Unnamed: 0,id,label,tweet,tweet_clean
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1...,"[fingerprint, pregnancy, test, https, goo, gl,..."
1,2,0,Finally a transparant silicon case ^^ Thanks t...,"[finally, transparant, silicon, case, thanks, ..."
2,3,0,We love this! Would you go? #talk #makememorie...,"[we, love, this, would, you, go, talk, makemem..."
3,4,0,I'm wired I know I'm George I was made that wa...,"[wired, know, george, was, made, that, way, ip..."
4,5,1,What amazing service! Apple won't even talk to...,"[what, amazing, service, apple, won, even, tal..."


# Feature Extraction

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [16]:
tfidf = TfidfVectorizer()

In [19]:
x = tfidf.fit_transform(data['tweet_clean'].astype(str))
y = data.label

# Splitting data

In [20]:
from sklearn.model_selection import train_test_split

In [21]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)

# Model Selection and Training

In [22]:
from sklearn.ensemble import RandomForestClassifier

In [23]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
rfc.fit(x_train,y_train)

# Evaluation

In [24]:
from sklearn.metrics import accuracy_score

In [25]:
y_pred = rfc.predict(x_test)
accuracy = accuracy_score(y_test,y_pred)
print("Accuracy = ",accuracy)

Accuracy =  0.88510101010101


# Prediction

In [31]:
new_tweet = "I love the newly released Samsung! Its very handy and efficient unlike iPhone."
new_tweet_cleaned = gensim.utils.simple_preprocess(new_tweet)
X_new = tfidf.transform(new_tweet_cleaned)
prediction = rfc.predict(X_new)

sentiment = "Positive" if prediction[0] == 0 else "Negative"

print("Tweet : ",new_tweet,"\nCleaned : ",new_tweet_cleaned,"\nPredicted : ",sentiment)

Tweet :  I love the newly released Samsung! Its very handy and efficient unlike iPhone. 
Cleaned :  ['love', 'the', 'newly', 'released', 'samsung', 'its', 'very', 'handy', 'and', 'efficient', 'unlike', 'iphone'] 
Predicted :  Positive
