# Twitter Sentiment Analysis

## Importing libraries

In [16]:
import os
import zipfile
import numpy as np
import pandas as pd
from wordcloud import WordCloud, STOPWORDS
import re
import nltk
import string
from gensim.utils import simple_preprocess

## Downloading and reading the dataset from Kaggle

In [2]:
! pip install kaggle==1.5.6
!kaggle datasets download -d kazanova/sentiment140

Downloading sentiment140.zip to c:\Users\andre\Documents\Twitter_Sentiment_Analysis




  0%|          | 0.00/80.9M [00:00<?, ?B/s]
  1%|          | 1.00M/80.9M [00:00<00:43, 1.91MB/s]
  2%|▏         | 2.00M/80.9M [00:01<00:39, 2.08MB/s]
  4%|▎         | 3.00M/80.9M [00:01<00:36, 2.22MB/s]
  5%|▍         | 4.00M/80.9M [00:01<00:36, 2.23MB/s]
  6%|▌         | 5.00M/80.9M [00:02<00:34, 2.29MB/s]
  7%|▋         | 6.00M/80.9M [00:02<00:34, 2.31MB/s]
  9%|▊         | 7.00M/80.9M [00:03<00:33, 2.34MB/s]
 10%|▉         | 8.00M/80.9M [00:03<00:32, 2.37MB/s]
 11%|█         | 9.00M/80.9M [00:04<00:31, 2.40MB/s]
 12%|█▏        | 10.0M/80.9M [00:04<00:31, 2.37MB/s]
 14%|█▎        | 11.0M/80.9M [00:05<00:38, 1.93MB/s]
 15%|█▍        | 12.0M/80.9M [00:05<00:36, 2.00MB/s]
 16%|█▌        | 13.0M/80.9M [00:06<00:33, 2.12MB/s]
 17%|█▋        | 14.0M/80.9M [00:06<00:31, 2.21MB/s]
 19%|█▊        | 15.0M/80.9M [00:07<00:30, 2.28MB/s]
 20%|█▉        | 16.0M/80.9M [00:07<00:29, 2.29MB/s]
 21%|██        | 17.0M/80.9M [00:08<00:29, 2.26MB/s]
 22%|██▏       | 18.0M/80.9M [00:08<00:29, 2.22MB/s]
 

In [3]:
with zipfile.ZipFile("sentiment140.zip","r") as zip_ref:
    zip_ref.extractall()

df = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding = 'Latin-1', names=('target','id','date','flag','user','text'))


In [4]:
os.remove('training.1600000.processed.noemoticon.csv')
os.remove('sentiment140.zip')

In [5]:
df.head()

Unnamed: 0,target,id,date,flag,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [6]:
df.target.value_counts()

0    800000
4    800000
Name: target, dtype: int64

## Data Prep

Removing punctuations

In [11]:
def remove_punc(message):
  return ''.join([char for char in message if char not in string.punctuation])

In [12]:
df['text_wo_punctuation'] = df['text'].apply(remove_punc)

Removing stopwords

In [15]:
nltk.download('stopwords')
stop_words = nltk.corpus.stopwords.words('english')
stop_words[:10]

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\andre\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]

In [21]:
def preprocess(text):
  return [word for word in simple_preprocess(text) if word not in stop_words and len(word) >= 3]

In [22]:
df['text_wo_punctuation_and_stopwords'] = df['text_wo_punctuation'].apply(preprocess)
df['text_wo_punctuation_and_stopwords_joined'] = df['text_wo_punctuation_and_stopwords'].apply(lambda x: " ".join(x))

In [23]:
df.head()

Unnamed: 0,target,id,date,flag,user,text,text_wo_punctuation,text_wo_punctuation_and_stopwords,text_wo_punctuation_and_stopwords_joined
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",switchfoot httptwitpiccom2y1zl Awww thats a b...,"[switchfoot, httptwitpiccom, awww, thats, bumm...",switchfoot httptwitpiccom awww thats bummer sh...
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...,is upset that he cant update his Facebook by t...,"[upset, cant, update, facebook, texting, might...",upset cant update facebook texting might cry r...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...,Kenichan I dived many times for the ball Manag...,"[kenichan, dived, many, times, ball, managed, ...",kenichan dived many times ball managed save re...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire,my whole body feels itchy and like its on fire,"[whole, body, feels, itchy, like, fire]",whole body feels itchy like fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all....",nationwideclass no its not behaving at all im ...,"[nationwideclass, behaving, mad, cant, see]",nationwideclass behaving mad cant see
