In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

In [2]:
df = pd.read_csv("/content/drive/MyDrive/Celerates MSIB/ALL Dataset/sentiment_analysis.csv")
df.shape

(499, 7)

In [3]:
df.head()

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram


In [4]:
df.tail()

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
494,2015,10,18,night,"According to , a quarter of families under six...",negative,Twitter
495,2021,2,25,morning,the plan to not spend money is not going well,negative,Instagram
496,2022,5,30,noon,uploading all my bamboozle pictures of facebook,neutral,Facebook
497,2018,8,10,night,congratulations ! you guys finish a month ear...,positive,Twitter
498,2019,3,25,morning,"actually, I wish I was back in Tahoe. I miss...",negative,Instagram


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 499 entries, 0 to 498
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Year           499 non-null    int64 
 1   Month          499 non-null    int64 
 2   Day            499 non-null    int64 
 3   Time of Tweet  499 non-null    object
 4   text           499 non-null    object
 5   sentiment      499 non-null    object
 6   Platform       499 non-null    object
dtypes: int64(3), object(4)
memory usage: 27.4+ KB


In [6]:
df.isnull().sum()

Unnamed: 0,0
Year,0
Month,0
Day,0
Time of Tweet,0
text,0
sentiment,0
Platform,0


In [7]:
df["sentiment"].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
neutral,199
positive,166
negative,134


In [8]:
# Combine Year, Month, and Day into a single datetime column named 'Date'
df['Date'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

In [9]:
# Drop the original Year, Month, and Day columns to reduce redundancy
df = df.drop(columns=['Year', 'Month', 'Day'])

In [10]:
df.head()

Unnamed: 0,Time of Tweet,text,sentiment,Platform,Date
0,morning,What a great day!!! Looks like dream.,positive,Twitter,2018-08-18
1,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook,2018-08-18
2,night,Don't angry me,negative,Facebook,2017-08-18
3,morning,We attend in the class just for listening teac...,negative,Facebook,2022-06-08
4,noon,"Those who want to go, let them go",negative,Instagram,2022-06-08


In [11]:
# Define a mapping for 'Time of Tweet' to more structured time periods
time_mapping = {
    'morning': '06:00:00',
    'noon': '12:00:00',
    'night': '18:00:00'
}

In [12]:
# Map the 'Time of Tweet' to the corresponding structured time
df['Structured Time'] = df['Time of Tweet'].map(time_mapping)

# Combine 'Date' and 'Structured Time' into a single datetime column
df['Datetime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Structured Time'])

In [13]:
df = df.drop(columns=['Time of Tweet', 'Structured Time'])

In [14]:
df.head()

Unnamed: 0,text,sentiment,Platform,Date,Datetime
0,What a great day!!! Looks like dream.,positive,Twitter,2018-08-18,2018-08-18 06:00:00
1,"I feel sorry, I miss you here in the sea beach",positive,Facebook,2018-08-18,2018-08-18 12:00:00
2,Don't angry me,negative,Facebook,2017-08-18,2017-08-18 18:00:00
3,We attend in the class just for listening teac...,negative,Facebook,2022-06-08,2022-06-08 06:00:00
4,"Those who want to go, let them go",negative,Instagram,2022-06-08,2022-06-08 12:00:00


In [15]:
df = df.drop("Datetime", axis=1)

In [16]:
df.head()

Unnamed: 0,text,sentiment,Platform,Date
0,What a great day!!! Looks like dream.,positive,Twitter,2018-08-18
1,"I feel sorry, I miss you here in the sea beach",positive,Facebook,2018-08-18
2,Don't angry me,negative,Facebook,2017-08-18
3,We attend in the class just for listening teac...,negative,Facebook,2022-06-08
4,"Those who want to go, let them go",negative,Instagram,2022-06-08


In [17]:
# Pembersihan 1

def clean_text1(text):
    text=re.sub('[''"",,,]','',text)
    text=re.sub('\n','',text)
    return text

cleaned1=lambda x:clean_text1(x)
df['text']=pd.DataFrame(df.text.apply(cleaned1))
df.head()

Unnamed: 0,text,sentiment,Platform,Date
0,What a great day!!! Looks like dream.,positive,Twitter,2018-08-18
1,I feel sorry I miss you here in the sea beach,positive,Facebook,2018-08-18
2,Don't angry me,negative,Facebook,2017-08-18
3,We attend in the class just for listening teac...,negative,Facebook,2022-06-08
4,Those who want to go let them go,negative,Instagram,2022-06-08


In [18]:
# Pembersihan 2

def clean_text2(text):
    text=text.lower()
    text=re.sub('\[.*?\]','',text)
    text=re.sub('\w*\d\w*','',text)
    return text

cleaned2=lambda x:clean_text2(x)
df['text']=pd.DataFrame(df.text.apply(cleaned2))
df.head()

Unnamed: 0,text,sentiment,Platform,Date
0,what a great day!!! looks like dream.,positive,Twitter,2018-08-18
1,i feel sorry i miss you here in the sea beach,positive,Facebook,2018-08-18
2,don't angry me,negative,Facebook,2017-08-18
3,we attend in the class just for listening teac...,negative,Facebook,2022-06-08
4,those who want to go let them go,negative,Instagram,2022-06-08


In [19]:
# Simpan dataset dengan label baru
labeled_file_path = '/content/drive/MyDrive/Celerates MSIB/ALL Dataset/Perbaikan1.csv'
df.to_csv(labeled_file_path, index=False)
labeled_file_path

'/content/drive/MyDrive/Celerates MSIB/ALL Dataset/Perbaikan1.csv'