Importing libraries

In [1]:
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
data = pd.read_csv('tweets.csv')

In [3]:
pd.set_option('display.max_colwidth',None)
data.head()

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1MfQV #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone
1,2,0,Finally a transparant silicon case ^^ Thanks to my uncle :) #yay #Sony #Xperia #S #sonyexperias… http://instagram.com/p/YGEt5JC6JM/
2,3,0,We love this! Would you go? #talk #makememories #unplug #relax #iphone #smartphone #wifi #connect... http://fb.me/6N3LsUpCu
3,4,0,I'm wired I know I'm George I was made that way ;) #iphone #cute #daventry #home http://instagr.am/p/Li_5_ujS4k/
4,5,1,What amazing service! Apple won't even talk to me about a question I have unless I pay them $19.95 for their stupid support!


In [4]:
data.drop('id',inplace=True,axis=1)

Preprocessing

In [5]:
def remove_urls(tweet):
    return re.sub(r'http\S+', '', tweet)

# Apply the function to the 'Tweet' column
data['tweet'] = data['tweet'].apply(remove_urls)

In [6]:
data.head()

Unnamed: 0,label,tweet
0,0,#fingerprint #Pregnancy Test #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone
1,0,Finally a transparant silicon case ^^ Thanks to my uncle :) #yay #Sony #Xperia #S #sonyexperias…
2,0,We love this! Would you go? #talk #makememories #unplug #relax #iphone #smartphone #wifi #connect...
3,0,I'm wired I know I'm George I was made that way ;) #iphone #cute #daventry #home
4,1,What amazing service! Apple won't even talk to me about a question I have unless I pay them $19.95 for their stupid support!


In [7]:
import nltk
from nltk.corpus import stopwords
import string

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('stopwords')


# Function for preprocessing
def preprocess_text(text):
    # Tokenize the text
    tokens = nltk.word_tokenize(text)

    # Get English stopwords
    stop_words = set(stopwords.words('english'))

    # Remove stopwords and punctuation
    tokens = [token.lower() for token in tokens if token.lower() not in stop_words and token not in string.punctuation]

    # Join tokens back into text
    processed_text = ' '.join(tokens)

    return processed_text

# Apply preprocessing to the 'tweet' column
data['cleaned_tweet'] = [preprocess_text(tweet) for tweet in data['tweet']]

print(data['cleaned_tweet'])


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


0                      fingerprint pregnancy test android apps beautiful cute health igers iphoneonly iphonesia iphone
1                                       finally transparant silicon case ^^ thanks uncle yay sony xperia sonyexperias…
2                                      love would go talk makememories unplug relax iphone smartphone wifi connect ...
3                                                           'm wired know 'm george made way iphone cute daventry home
4                                      amazing service apple wo n't even talk question unless pay 19.95 stupid support
                                                             ...                                                      
7915                                                      live loud lol liveoutloud selfie smile sony music headphones
7916    would like wish amazing day make every minute count tls today iphone accessories news life february 23 2017 0…
7917                               helping lovel

In [8]:
data.head()

Unnamed: 0,label,tweet,cleaned_tweet
0,0,#fingerprint #Pregnancy Test #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone,fingerprint pregnancy test android apps beautiful cute health igers iphoneonly iphonesia iphone
1,0,Finally a transparant silicon case ^^ Thanks to my uncle :) #yay #Sony #Xperia #S #sonyexperias…,finally transparant silicon case ^^ thanks uncle yay sony xperia sonyexperias…
2,0,We love this! Would you go? #talk #makememories #unplug #relax #iphone #smartphone #wifi #connect...,love would go talk makememories unplug relax iphone smartphone wifi connect ...
3,0,I'm wired I know I'm George I was made that way ;) #iphone #cute #daventry #home,'m wired know 'm george made way iphone cute daventry home
4,1,What amazing service! Apple won't even talk to me about a question I have unless I pay them $19.95 for their stupid support!,amazing service apple wo n't even talk question unless pay 19.95 stupid support


Train Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(data['tweet'], data['label'], test_size=0.2, random_state=42)


Feature extraction using CountVectorizer


In [10]:

count_vectorizer = CountVectorizer()
X_train_counts = count_vectorizer.fit_transform(X_train)
X_test_counts = count_vectorizer.transform(X_test)

Model training

In [11]:

svm_classifier = SVC(kernel='linear')  # Linear SVM classifier
svm_classifier.fit(X_train_counts, y_train)

In [12]:
predictions = svm_classifier.predict(X_test_counts)


Evaluate the model

In [13]:

accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.8611111111111112


In [14]:
def sentiment(text):
  text = count_vectorizer.transform(text)
  prediction = svm_classifier.predict(text)
  if prediction == 1:
    print('-ve sentiment')
  else:
    print('+ve sentiment')


In [15]:
sentiment(['i am happy'])

+ve sentiment
