## Load Dataset

In [3]:
import pandas as pd

In [4]:
# import dataset
ikn_df = pd.read_csv('ikn_fb_comment.csv')
ikn_df.head()

Unnamed: 0,id,date,profileId,profileName,text
0,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzg1NjU0MTQ5OD...,2023-05-31T01:58:04.000Z,pfbid0jYSE3NidmjiRHMu5yh43SHNnisKR7EhGFpPotKhV...,Soekma Giri Gerilyanto,"Agustus 2024, Ibu Kota negara pindah ke Kalima..."
1,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjMxODAwNDM3OD...,2023-05-31T04:37:52.000Z,100002315165121,Mogi Mahardhika,Keren Logo dan makna Filosofis yang terkandung...
2,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfOTY5ODI0NjcwNz...,2023-05-31T02:29:00.000Z,pfbid02ewJkERjjYtTX2w94vayhA48eVf9BrxBxwjdvcyh...,Muhammad Husain,Ini terobosan emas buat pulau Kalimantan oleh ...
3,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzk3NzkzODc1MT...,2023-05-31T02:05:51.000Z,pfbid02fSgeCwm7HH9rRLTL4a9FQAsrryES3XKZWxzpFN3...,Agustien Tulong,Keren filosofinya. Semoga dipahami dan diprakt...
4,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjAyMTM0NzQ2MD...,2023-05-31T01:59:47.000Z,pfbid0MzxmwRA2TPjF27CRZbPPbkaUuC4qPBRrgK5wU2y5...,M Taufiq Ash,Terlihat bernuansa kegagahan dan kekokohan seb...


In [5]:
ikn_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           500 non-null    object
 1   date         500 non-null    object
 2   profileId    500 non-null    object
 3   profileName  500 non-null    object
 4   text         475 non-null    object
dtypes: object(5)
memory usage: 19.7+ KB


## Cleaning Data

In [6]:
# menghapus duplikasi data
ikn_df = ikn_df.drop_duplicates()
ikn_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           500 non-null    object
 1   date         500 non-null    object
 2   profileId    500 non-null    object
 3   profileName  500 non-null    object
 4   text         475 non-null    object
dtypes: object(5)
memory usage: 23.4+ KB


In [7]:
# menghapus null data
ikn_df = ikn_df.dropna()
ikn_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 475 entries, 0 to 499
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           475 non-null    object
 1   date         475 non-null    object
 2   profileId    475 non-null    object
 3   profileName  475 non-null    object
 4   text         475 non-null    object
dtypes: object(5)
memory usage: 22.3+ KB


## Preprocess Data

In [8]:
# Install Sastrawi
!pip install Sastrawi

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
# import library
import re
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

In [10]:
# preprocess indonesian text data
def preprocess_text(text):
    # remove url
    text = re.sub(r"http\S+", "", text)
    # remove angka
    text = re.sub(r"\d+", "", text)
    # remove punctuation
    text = text.translate(str.maketrans("","",string.punctuation))
    # case folding
    text = text.lower()
    # tokenization
    text = word_tokenize(text)
    # remove stopwords
    stop_words = set(stopwords.words('indonesian'))
    text = [word for word in text if word not in stop_words]
    # stemming
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    text = [stemmer.stem(word) for word in text]
    # join text
    text = ' '.join(text)
    # remove whitespace leading & trailing
    text = text.strip()
    # remove multiple whitespace into single whitespace
    text = re.sub('\s+',' ',text)
    return text


In [11]:
# apply preprocess_text function to dataset
ikn_df['clean_text'] = ikn_df['text'].apply(preprocess_text)
ikn_df.head()

Unnamed: 0,id,date,profileId,profileName,text,clean_text
0,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzg1NjU0MTQ5OD...,2023-05-31T01:58:04.000Z,pfbid0jYSE3NidmjiRHMu5yh43SHNnisKR7EhGFpPotKhV...,Soekma Giri Gerilyanto,"Agustus 2024, Ibu Kota negara pindah ke Kalima...",agustus kota negara pindah kalimantan
1,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjMxODAwNDM3OD...,2023-05-31T04:37:52.000Z,100002315165121,Mogi Mahardhika,Keren Logo dan makna Filosofis yang terkandung...,keren logo makna filosofis kandung dalam moga ...
2,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfOTY5ODI0NjcwNz...,2023-05-31T02:29:00.000Z,pfbid02ewJkERjjYtTX2w94vayhA48eVf9BrxBxwjdvcyh...,Muhammad Husain,Ini terobosan emas buat pulau Kalimantan oleh ...,terobos emas pulau kalimantan presiden mari du...
3,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzk3NzkzODc1MT...,2023-05-31T02:05:51.000Z,pfbid02fSgeCwm7HH9rRLTL4a9FQAsrryES3XKZWxzpFN3...,Agustien Tulong,Keren filosofinya. Semoga dipahami dan diprakt...,keren filosofi moga paham praktek usak lingkun...
4,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjAyMTM0NzQ2MD...,2023-05-31T01:59:47.000Z,pfbid0MzxmwRA2TPjF27CRZbPPbkaUuC4qPBRrgK5wU2y5...,M Taufiq Ash,Terlihat bernuansa kegagahan dan kekokohan seb...,nuansa gagah kokoh raja negara yg


In [12]:
# remove text with length < 3
ikn_df = ikn_df[ikn_df['clean_text'].str.len() > 3]
ikn_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 443 entries, 0 to 499
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           443 non-null    object
 1   date         443 non-null    object
 2   profileId    443 non-null    object
 3   profileName  443 non-null    object
 4   text         443 non-null    object
 5   clean_text   443 non-null    object
dtypes: object(6)
memory usage: 24.2+ KB


## Sentiment Analysis

In [13]:
!pip install textblob

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
# import library
from textblob import TextBlob
from googletrans import Translator

In [21]:
analysis = TextBlob(ikn_df['clean_text'][1])
analysis = analysis.translate(from_lang='id', to='en')
if analysis.sentiment.polarity > 0:
    print ('positive')
elif analysis.sentiment.polarity == 0:
    print ('neutral')
else:
    print ('negative')
print(analysis)

positive
Cool Logo The philosophical meaning of the biological in I hope Allah SWT will run smoothly


In [24]:
translator = Translator()

# sentiment analysis
def sentiment_analysis(text):
    # translate text to english
    analysis = translator.translate(text, dest='en')

    # sentiment analysis using textblob
    analysis = TextBlob(analysis.text)
    # translate polarity to sentiment
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity == 0:
        return 'neutral'
    else:
        return 'negative'

In [25]:
# apply sentiment_analysis function to dataset
ikn_df['sentiment'] = ikn_df['clean_text'].apply(sentiment_analysis)
ikn_df.head()

Unnamed: 0,id,date,profileId,profileName,text,clean_text,sentiment
0,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzg1NjU0MTQ5OD...,2023-05-31T01:58:04.000Z,pfbid0jYSE3NidmjiRHMu5yh43SHNnisKR7EhGFpPotKhV...,Soekma Giri Gerilyanto,"Agustus 2024, Ibu Kota negara pindah ke Kalima...",agustus kota negara pindah kalimantan,neutral
1,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjMxODAwNDM3OD...,2023-05-31T04:37:52.000Z,100002315165121,Mogi Mahardhika,Keren Logo dan makna Filosofis yang terkandung...,keren logo makna filosofis kandung dalam moga ...,positive
2,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfOTY5ODI0NjcwNz...,2023-05-31T02:29:00.000Z,pfbid02ewJkERjjYtTX2w94vayhA48eVf9BrxBxwjdvcyh...,Muhammad Husain,Ini terobosan emas buat pulau Kalimantan oleh ...,terobos emas pulau kalimantan presiden mari du...,positive
3,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfNzk3NzkzODc1MT...,2023-05-31T02:05:51.000Z,pfbid02fSgeCwm7HH9rRLTL4a9FQAsrryES3XKZWxzpFN3...,Agustien Tulong,Keren filosofinya. Semoga dipahami dan diprakt...,keren filosofi moga paham praktek usak lingkun...,positive
4,Y29tbWVudDo4MTkwNjA1Mzk1NzYwNDJfMjAyMTM0NzQ2MD...,2023-05-31T01:59:47.000Z,pfbid0MzxmwRA2TPjF27CRZbPPbkaUuC4qPBRrgK5wU2y5...,M Taufiq Ash,Terlihat bernuansa kegagahan dan kekokohan seb...,nuansa gagah kokoh raja negara yg,positive


In [26]:
# Count sentiment value
ikn_df['sentiment'].value_counts()

positive    234
neutral     183
negative     26
Name: sentiment, dtype: int64

In [28]:
# take 1 negative sentiment
ikn_df[ikn_df['sentiment'] == 'negative']['text'].head(1).values[0]

'Kenapa tidak logo batang sawit dibuat ya,, itu lebih masa depan. Walaupun harga anjlok, saya tidak tau darimana salahnya kebijakan kah atau mafia lagi. Atau sibuk urus partai..'

In [None]:
# export dataset
# ikn_df.to_csv('dataset_sentiment_analysis_ikn2.csv', index=False)