# Preprocessing

In [None]:
!pip install tweet-preprocessor

Collecting tweet-preprocessor
  Downloading tweet_preprocessor-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0


In [None]:
import preprocessor as p
from textblob import TextBlob
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import pandas as pd

In [None]:
data = pd.read_csv('/content/data.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   conversation_id_str      200 non-null    int64 
 1   created_at               200 non-null    object
 2   favorite_count           200 non-null    int64 
 3   full_text                200 non-null    object
 4   id_str                   200 non-null    int64 
 5   image_url                145 non-null    object
 6   in_reply_to_screen_name  21 non-null     object
 7   lang                     200 non-null    object
 8   location                 119 non-null    object
 9   quote_count              200 non-null    int64 
 10  reply_count              200 non-null    int64 
 11  retweet_count            200 non-null    int64 
 12  tweet_url                200 non-null    object
 13  user_id_str              200 non-null    int64 
 14  username                 200 non-null    o

In [None]:
data = data[['username','created_at','full_text']]
data.sample(n=5)

Unnamed: 0,username,created_at,full_text
180,afifwilians,Fri Apr 26 03:32:42 +0000 2024,@BungTowel iya tapi dulu mainnya di tempat kub...
185,Hansunriko,Mon Apr 29 23:04:42 +0000 2024,JAGA SUASANA HARMONIS UNTUK MEMBANGUN MASA DEP...
162,Abrisamalimustf,Fri Apr 26 12:24:44 +0000 2024,Ssst.. bung Towel jangan ampe tau https://t.co...
7,BolaBolaAja,Wed Apr 24 14:57:24 +0000 2024,Hotman Paris : Kalau 1-0 nanti gajinya Shin Ta...
8,AgusBA9,Thu Apr 25 21:19:59 +0000 2024,Bung towel di persilahkan komentarnya https://...


In [None]:
data.isnull().sum()

username      0
created_at    0
full_text     0
dtype: int64

In [None]:
def preprocessing_data(x):
    return p.clean(x)

def tokenize_data(x):
    return p.tokenize(x)

data['tweet_clean'] = data['full_text'].apply(preprocessing_data)
data['tweet_clean'] = data['tweet_clean'].apply(tokenize_data)
data = data.drop_duplicates()

# Translating Data

In [None]:
pip install deep-translator

Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m692.1 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: deep-translator
Successfully installed deep-translator-1.11.4


In [None]:
from deep_translator import GoogleTranslator
translator = GoogleTranslator()

def convert_eng(tweet):
    return translator.translate(tweet,lang_tgt='en')

data['tweet_english'] = data['tweet_clean'].apply(convert_eng)

In [None]:
data.head(10)

Unnamed: 0,username,created_at,full_text,tweet_clean,tweet_english
0,fariznr7_,Thu Apr 25 21:15:37 +0000 2024,bung towel harus sadar saking professionalnya ...,bung towel harus sadar saking professionalnya ...,Dude Towel has to realize how professional STY...
1,lipatanpipi,Thu Apr 25 21:43:54 +0000 2024,Misinya apa si bung towel ini? Gila https://t....,Misinya apa si bung towel ini? Gila,What is this dude towel's mission? Crazy
2,BolaBolaAja,Fri Apr 26 12:03:45 +0000 2024,Bung Towel : Shin Tae Yong harus berterima Kas...,Bung Towel : Shin Tae Yong harus berterima Kas...,Bung Towel: Shin Tae Yong must thank the leagu...
3,MafiaWasit,Thu Apr 25 23:47:13 +0000 2024,Rafael Struick dkk ga boleh jumawa hanya karen...,Rafael Struick dkk ga boleh jumawa hanya karen...,Rafael Struick et al shouldn't be proud just b...
4,idextratime,Wed Apr 24 14:53:21 +0000 2024,KALO 1-0 GAJINYA SHIN TAE-YONG MAKIN TINGGI B...,KALO $NUMBER$ GAJINYA SHIN TAE-YONG MAKIN TING...,IF SHIN TAE-YONG'S SALARY $NUMBER$ IS GETTING ...
5,seterahdeh,Mon Apr 29 16:00:49 +0000 2024,bung towel pas liat indonesia kalah: https://t...,bung towel pas liat indonesia kalah:,"Dude Towel, when he saw Indonesia lose:"
6,ripbotttt,Mon Apr 29 06:25:12 +0000 2024,no one literally no one: bung towel:,no one literally no one: bung towel:,no one literally no one: bung towel:
7,BolaBolaAja,Wed Apr 24 14:57:24 +0000 2024,Hotman Paris : Kalau 1-0 nanti gajinya Shin Ta...,Hotman Paris : Kalau $NUMBER$ nanti gajinya Sh...,Hotman Paris: If $NUMBER$ then Shin Tae Yong's...
8,AgusBA9,Thu Apr 25 21:19:59 +0000 2024,Bung towel di persilahkan komentarnya https://...,Bung towel di persilahkan komentarnya,Dude Towel is welcome to comment
9,kevinpramudya_,Thu Apr 25 18:47:41 +0000 2024,Nobar timnas di Balaikota Semarang yang depan ...,Nobar timnas di Balaikota Semarang yang depan ...,National team performance at Semarang City Hal...


In [None]:
stemmer = PorterStemmer()

def stemming_data(x):
    return stemmer.stem(x)

data['tweet_english'] = data['tweet_english'].apply(stemming_data)

**Labeling Dengan TextBlob**

In [None]:
data_tweet = list(data['tweet_english'])
polaritas = 0

status = []
total_positif = total_negatif = total_netral = total = 0

for i, tweet in enumerate(data_tweet):
    analysis = TextBlob(tweet)
    polaritas += analysis.polarity

    if analysis.sentiment.polarity > 0.0:
        total_positif += 1
        status.append('Positif')
    elif analysis.sentiment.polarity == 0.0:
        total_netral += 1
        status.append('Netral')
    else:
        total_negatif += 1
        status.append('Negatif')

    total += 1

print(f'Hasil Analisis Data:\nPositif = {total_positif}\nNetral = {total_netral}\nNegatif = {total_negatif}')
print(f'\nTotal Data : {total}')

Hasil Analisis Data:
Positif = 90
Netral = 77
Negatif = 33

Total Data : 200


In [None]:
status = pd.DataFrame({'klasifikasi': status})
data['klasifikasi'] = status
data.tail()

Unnamed: 0,username,created_at,full_text,tweet_clean,tweet_english,klasifikasi
195,gateduabelas,Sun Apr 28 02:39:47 +0000 2024,@dhemit_is_back Sudah ada ulasanya di youtube ...,Sudah ada ulasanya di youtube,there's already a review on youtub,Netral
196,vanlith_gl,Mon Apr 29 15:57:26 +0000 2024,Bung Towel bahagia.,Bung Towel bahagia.,dude towel is happy.,Positif
197,OposisiCerdas,Thu Apr 25 23:08:57 +0000 2024,Ernest Prakasa Sentil Bung Towel yang Terus Se...,Ernest Prakasa Sentil Bung Towel yang Terus Se...,ernest prakasa flicks bung towel who keeps att...,Negatif
198,ecaesarp,Fri Apr 26 14:06:33 +0000 2024,@BurhanMuhtadi Dikasih tau rank liga 1 kita 28...,Dikasih tau rank liga kita bung towel kesinggu...,"when we were told our league rank, bro towel w...",Negatif
199,cumabacabaca,Thu Apr 25 03:20:33 +0000 2024,@SiaranBolaLive contohlah semangat bung towel ...,contohlah semangat bung towel dan kpsi dalam h...,example of the spirit of bung towel and kpsi i...,Positif


# Klasifikasi

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

Preparing Data Test & Train

In [None]:
dataset = data[['tweet_english','klasifikasi']]
dataset = [tuple(x) for x in dataset.to_records(index=False)]

In [None]:
import random

set_positif = []
set_negatif = []
set_netral = []

for n in dataset:
    if(n[1] == 'Positif'):
      set_positif.append(n)
    elif(n[1] == 'Negatif'):
      set_negatif.append(n)
    else:
      set_netral.append(n)

set_positif = random.sample(set_positif, k=int(len(set_positif)/2))
set_negatif = random.sample(set_negatif, k=int(len(set_negatif)/2))
set_netral = random.sample(set_netral, k=int(len(set_netral)/2))

train = set_positif + set_negatif + set_netral

train_set = []

for n in train:
     train_set.append(n)

Training Classifier

In [None]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train_set)
print('Akurasi Test:', cl.accuracy(dataset))

Akurasi Test: 0.73


**Klasifikasi Ulang dengan NBC**

In [None]:
data_tweet = list(data['tweet_english'])
polaritas = 0

status = []
total_positif = total_negatif = total_netral = total = 0

for i, tweet in enumerate(data_tweet):
    analysis = TextBlob(tweet, classifier=cl)

    if analysis.classify() == 'Positif':
        total_positif += 1
    elif analysis.classify() == 'Netral':
        total_netral += 1
    else:
        total_negatif += 1

    status.append(analysis.classify())
    total += 1

print(f'\nHasil Analisis Data:\nPositif = {total_positif}\nNetral = {total_netral}\nNegatif = {total_negatif}')
print(f'\nTotal Data : {total}')


Hasil Analisis Data:
Positif = 66
Netral = 121
Negatif = 13

Total Data : 200


In [None]:
status = pd.DataFrame({'klasifikasi_bayes': status})
data['klasifikasi_bayes'] = status

In [None]:
data.sample(n=10)

Unnamed: 0,username,created_at,full_text,tweet_clean,tweet_english,klasifikasi,klasifikasi_bayes
31,IDGoonerscom,Wed Apr 24 14:49:47 +0000 2024,Benji adalah kita ke bung Towel https://t.co/Q...,Benji adalah kita ke bung Towel,benji is us to the towel dud,Netral,Netral
106,Ustadz_Ahong,Fri Apr 26 08:34:16 +0000 2024,Bung Towel suruh ganti nama aja Nguyen Van Towel.,Bung Towel suruh ganti nama aja Nguyen Van Towel.,bung towel told me to just change the name to ...,Netral,Netral
15,ecaesarp,Fri Apr 26 13:26:06 +0000 2024,STY kritisi mental dan stamina pemain tapi yan...,STY kritisi mental dan stamina pemain tapi yan...,sty criticized the mental and stamina of the p...,Negatif,Negatif
198,ecaesarp,Fri Apr 26 14:06:33 +0000 2024,@BurhanMuhtadi Dikasih tau rank liga 1 kita 28...,Dikasih tau rank liga kita bung towel kesinggu...,"when we were told our league rank, bro towel w...",Negatif,Negatif
126,Bisniscom,Sun Apr 28 02:23:34 +0000 2024,Sosok Bung Towel Pengamat Sepak Bola yang Kera...,Sosok Bung Towel Pengamat Sepak Bola yang Kera...,"the figure of bung towel, a football observer ...",Netral,Netral
138,aan__,Tue Apr 23 09:22:34 +0000 2024,@idextratime bisa ga sih coach sty jd wni aja ...,bisa ga sih coach sty jd wni aja barter ma bun...,can coach sty be a foreigner just barter ma du...,Netral,Netral
19,archievedinoo,Mon Apr 29 17:45:59 +0000 2024,info kakak dipake bung towel https://t.co/by6X...,info kakak dipake bung towel,"bro, bro, use a towel",Netral,Netral
116,quintta1217,Thu Apr 25 21:43:51 +0000 2024,Shin Tae Yong Korea best coach Bung Towel @Bun...,Shin Tae Yong Korea best coach Bung Towel Indo...,shin tae yong korea best coach bung towel indo...,Positif,Positif
23,fachrul01,Thu Apr 25 21:18:31 +0000 2024,Korsel saja bisa di kalahkan saatnya Indonesia...,Korsel saja bisa di kalahkan saatnya Indonesia...,south korea can be beaten when indonesia is th...,Netral,Netral
14,unmagnetism,Thu Apr 25 21:01:24 +0000 2024,tolong seseorang cek bung towel aman ga,tolong seseorang cek bung towel aman ga,please someone check whether the towel is safe...,Positif,Netral
