# Topic Modeling Using LSA 

### 1. Import Library

In [1]:
import pandas as pd
from googleapiclient.discovery import build

### 2. Fungsi untuk crawling komentar

In [2]:
def video_comments(video_id):
	# empty list for storing reply
	replies = []

	# creating youtube resource object
	youtube = build('youtube', 'v3', developerKey=api_key)

	# retrieve youtube video results
	video_response = youtube.commentThreads().list(part='snippet,replies', videoId=video_id).execute()

	# iterate video response
	while video_response:
		
		# extracting required info
		# from each result object
		for item in video_response['items']:
			
			# Extracting comments ()
			published = item['snippet']['topLevelComment']['snippet']['publishedAt']
			user = item['snippet']['topLevelComment']['snippet']['authorDisplayName']

			# Extracting comments
			comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
			likeCount = item['snippet']['topLevelComment']['snippet']['likeCount']

			replies.append([published, user, comment, likeCount])
			
			# counting number of reply of comment
			replycount = item['snippet']['totalReplyCount']

			# if reply is there
			if replycount>0:
				# iterate through all reply
				for reply in item['replies']['comments']:
					
					# Extract reply
					published = reply['snippet']['publishedAt']
					user = reply['snippet']['authorDisplayName']
					repl = reply['snippet']['textDisplay']
					likeCount = reply['snippet']['likeCount']
					
					# Store reply is list
					#replies.append(reply)
					replies.append([published, user, repl, likeCount])

			# print comment with list of reply
			#print(comment, replies, end = '\n\n')

			# empty reply list
			#replies = []

		# Again repeat
		if 'nextPageToken' in video_response:
			video_response = youtube.commentThreads().list(
					part = 'snippet,replies',
					pageToken = video_response['nextPageToken'], 
					videoId = video_id
				).execute()
		else:
			break
	#endwhile
	return replies


### 3. Jalankan Proses Crawling

In [3]:
# isikan dengan api key Anda
api_key = 'AIzaSyDqDchwF4wWk70AMD53NroGqew30s2IgEw'

# Enter video id
# contoh url video = https://www.youtube.com/watch?v=eTuUctD4STs
video_id = "eTuUctD4STs" #isikan dengan kode / ID video

# Call function
comments = video_comments(video_id)

comments

[['2023-05-09T11:00:53Z',
  'Baim GunturNugroho',
  'Saatx bpk prabowo pimpin indonesia 2024-2029',
  1],
 ['2023-05-08T04:22:33Z',
  'barbosa imran',
  'Prabowo udah tua udah baun tanah, ganjar ngak punya prestasi, anis terbukti membawa perubahan jakarta',
  0],
 ['2023-05-05T23:31:08Z',
  'Asep Hidayat',
  'prabowo prabowo 😂😂😂 padahal yang demen ngejelek jelekin anies baswedan itu si andre rosiade 😂😂😂 kaga jelass emang 😂😂😂😂',
  0],
 ['2023-05-05T14:02:44Z',
  'Ilham Nugraha',
  'UNTUK 2024 KAMI TETAP MEMILIH RI 1 ANIS RASYID BASWEDAN...<br>MOHON MAAF UNTUK PAK PRABOWO KAMI SANGAT KECEWA......<br>INGAT SUARA ASLI MU 70% PINDAH KE ANIES ...',
  0],
 ['2023-05-05T13:09:46Z', 'Afgan Osama', 'Sdh pernh', 0],
 ['2023-05-05T10:28:41Z',
  'Wastmo Wasto',
  'Pk prabowo pnya hati mulia dan ikhlas',
  0],
 ['2023-05-05T09:38:39Z',
  'Mohamad Kamir',
  'Bravo ganjar, prabowo and anies 🇮🇩🇮🇩🇮🇩',
  0],
 ['2023-05-05T09:26:06Z',
  'Wenny 28',
  'rakyat pingin bbm listrik sebako harga murah dan korup

### 4. Ubah Hasil Crawling ke Dataframe

In [4]:
df = pd.DataFrame(comments, columns=['publishedAt', 'authorDisplayName', 'textDisplay', 'likeCount'])
df

Unnamed: 0,publishedAt,authorDisplayName,textDisplay,likeCount
0,2023-05-09T11:00:53Z,Baim GunturNugroho,Saatx bpk prabowo pimpin indonesia 2024-2029,1
1,2023-05-08T04:22:33Z,barbosa imran,"Prabowo udah tua udah baun tanah, ganjar ngak ...",0
2,2023-05-05T23:31:08Z,Asep Hidayat,prabowo prabowo 😂😂😂 padahal yang demen ngejele...,0
3,2023-05-05T14:02:44Z,Ilham Nugraha,UNTUK 2024 KAMI TETAP MEMILIH RI 1 ANIS RASYID...,0
4,2023-05-05T13:09:46Z,Afgan Osama,Sdh pernh,0
5,2023-05-05T10:28:41Z,Wastmo Wasto,Pk prabowo pnya hati mulia dan ikhlas,0
6,2023-05-05T09:38:39Z,Mohamad Kamir,"Bravo ganjar, prabowo and anies 🇮🇩🇮🇩🇮🇩",0
7,2023-05-05T09:26:06Z,Wenny 28,rakyat pingin bbm listrik sebako harga murah d...,0
8,2023-05-05T09:13:30Z,Hari Romadon,Ya klo Ganjar iya klo anes no anes jdi Mentri ...,0
9,2023-05-05T09:12:13Z,Tomo Utomo,Memangnya Anis bisa apa..mimpin DKI aja dpat w...,0


## Preprocessing Data



In [5]:
import pandas as pd
import re
import numpy as np

### 1. Case Folding

In [6]:
#Insialisai variable yang akan dilakukan case folding dengan fungsi lower()
lower_case_komentar = df['textDisplay'].str.lower()

#Menampilkan data yang telah dilakukan case folding
data_lower_case = pd.DataFrame(lower_case_komentar)
data_lower_case

Unnamed: 0,textDisplay
0,saatx bpk prabowo pimpin indonesia 2024-2029
1,"prabowo udah tua udah baun tanah, ganjar ngak ..."
2,prabowo prabowo 😂😂😂 padahal yang demen ngejele...
3,untuk 2024 kami tetap memilih ri 1 anis rasyid...
4,sdh pernh
5,pk prabowo pnya hati mulia dan ikhlas
6,"bravo ganjar, prabowo and anies 🇮🇩🇮🇩🇮🇩"
7,rakyat pingin bbm listrik sebako harga murah d...
8,ya klo ganjar iya klo anes no anes jdi mentri ...
9,memangnya anis bisa apa..mimpin dki aja dpat w...


### 2. Cleansing

In [7]:
#Insialisasi dataset clean
clean =[]

#Melakukan perulangan sepanjang data case folding
for i in range (len(lower_case_komentar)): 
  clean_tag  = re.sub("@[A-Za-z0-9_]+","", lower_case_komentar[i]) #clenasing mention
  clean_hashtag = re.sub("#[A-Za-z0-9_]+","", clean_tag) #clenasing hashtag 
  clean_https = re.sub(r'http\S+', '', clean_hashtag) #cleansing url link
  clean_symbols = re.sub("[^a-zA-Zï ]+"," ", clean_https) #cleansing character

  #Memasukkan hasil clean kedalam array kosong yang di inisialisasi sebelumnya
  clean.append(clean_symbols)


#Menampilkan ke dalam bentuk dataframe  
clean_result = pd.DataFrame(clean,columns=['Cleansing Komentar'])
clean_result 

Unnamed: 0,Cleansing Komentar
0,saatx bpk prabowo pimpin indonesia
1,prabowo udah tua udah baun tanah ganjar ngak ...
2,prabowo prabowo padahal yang demen ngejelek ...
3,untuk kami tetap memilih ri anis rasyid ba...
4,sdh pernh
5,pk prabowo pnya hati mulia dan ikhlas
6,bravo ganjar prabowo and anies
7,rakyat pingin bbm listrik sebako harga murah d...
8,ya klo ganjar iya klo anes no anes jdi mentri ...
9,memangnya anis bisa apa mimpin dki aja dpat wa...


### 3. Deteksi kata tidak baku (Slank Words)

In [8]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [9]:
# Membuat kamus slang words dan kata Indonesia yang benar
slang_dict = {"@": "di", "abis": "habis", "ad": "ada", "adlh": "adalah", "afaik": "as far as i know", "ahaha": "haha", "aj": "saja", "ajep-ajep": "dunia gemerlap", "ak": "saya", "akika": "aku", "akkoh": "aku", "akuwh": "aku", "alay": "norak", "alow": "halo", "ambilin": "ambilkan", "ancur": "hancur", "anjrit": "anjing", "anter": "antar", "ap2": "apa-apa", "apasih": "apa sih", "apes": "sial", "aps": "apa", "aq": "saya", "aquwh": "aku", "asbun": "asal bunyi", "aseekk": "asyik", "asekk": "asyik", "asem": "asam", "aspal": "asli tetapi palsu", "astul": "asal tulis", "ato": "atau", "au ah": "tidak mau tahu", "awak": "saya", "ay": "sayang", "ayank": "sayang", "b4": "sebelum", "bakalan": "akan", "bandes": "bantuan desa", "bangedh": "banget", "banpol": "bantuan polisi", "banpur": "bantuan tempur", "basbang": "basi", "bcanda": "bercanda", "bdg": "bandung", "begajulan": "nakal", "beliin": "belikan", "bencong": "banci", "bentar": "sebentar", "ber3": "bertiga", "beresin": "membereskan", "bete": "bosan", "beud": "banget", "bg": "abang", "bgmn": "bagaimana", "bgt": "banget", "bijimane": "bagaimana", "bintal": "bimbingan mental", "bkl": "akan", "bknnya": "bukannya", "blegug": "bodoh", "blh": "boleh", "bln": "bulan", "blum": "belum", "bnci": "benci", "bnran": "yang benar", "bodor": "lucu", "bokap": "ayah", "boker": "buang air besar", "bokis": "bohong", "boljug": "boleh juga", "bonek": "bocah nekat", "boyeh": "boleh", "br": "baru", "brg": "bareng", "bro": "saudara laki-laki", "bru": "baru", "bs": "bisa", "bsen": "bosan", "bt": "buat", "btw": "ngomong-ngomong", "buaya": "tidak setia", "bubbu": "tidur", "bubu": "tidur", "bumil": "ibu hamil", "bw": "bawa", "bwt": "buat", "byk": "banyak", "byrin": "bayarkan", "cabal": "sabar", "cadas": "keren", "calo": "makelar", "can": "belum", "capcus": "pergi", "caper": "cari perhatian", "ce": "cewek", "cekal": "cegah tangkal", "cemen": "penakut", "cengengesan": "tertawa", "cepet": "cepat", "cew": "cewek", "chuyunk": "sayang", "cimeng": "ganja", "cipika cipiki": "cium pipi kanan cium pipi kiri", "ciyh": "sih", "ckepp": "cakep", "ckp": "cakep", "cmiiw": "correct me if i'm wrong", "cmpur": "campur", "cong": "banci", "conlok": "cinta lokasi", "cowwyy": "maaf", "cp": "siapa", "cpe": "capek", "cppe": "capek", "cucok": "cocok", "cuex": "cuek", "cumi": "Cuma miscall", "cups": "culun", "curanmor": "pencurian kendaraan bermotor", "curcol": "curahan hati colongan", "cwek": "cewek", "cyin": "cinta", "d": "di", "dah": "deh", "dapet": "dapat", "de": "adik", "dek": "adik", "demen": "suka", "deyh": "deh", "dgn": "dengan", "diancurin": "dihancurkan", "dimaafin": "dimaafkan", "dimintak": "diminta", "disono": "di sana", "dket": "dekat", "dkk": "dan kawan-kawan", "dll": "dan lain-lain", "dlu": "dulu", "dngn": "dengan", "dodol": "bodoh", "doku": "uang", "dongs": "dong", "dpt": "dapat", "dri": "dari", "drmn": "darimana", "drtd": "dari tadi", "dst": "dan seterusnya", "dtg": "datang", "duh": "aduh", "duren": "durian", "ed": "edisi", "egp": "emang gue pikirin", "eke": "aku", "elu": "kamu", "emangnya": "memangnya", "emng": "memang", "endak": "tidak", "enggak": "tidak", "envy": "iri", "ex": "mantan", "fax": "facsimile", "fifo": "first in first out", "folbek": "follow back", "fyi": "sebagai informasi", "gaada": "tidak ada uang", "gag": "tidak", "gaje": "tidak jelas", "gak papa": "tidak apa-apa", "gan": "juragan", "gaptek": "gagap teknologi", "gatek": "gagap teknologi", "gawe": "kerja", "gbs": "tidak bisa", "gebetan": "orang yang disuka", "geje": "tidak jelas", "gepeng": "gelandangan dan pengemis", "ghiy": "lagi", "gile": "gila", "gimana": "bagaimana", "gino": "gigi nongol", "githu": "gitu", "gj": "tidak jelas", "gmana": "bagaimana", "gn": "begini", "goblok": "bodoh", "golput": "golongan putih", "gowes": "mengayuh sepeda", "gpny": "tidak punya", "gr": "gede rasa", "gretongan": "gratisan", "gtau": "tidak tahu", "gua": "saya", "guoblok": "goblok", "gw": "saya", "ha": "tertawa", "haha": "tertawa", "hallow": "halo", "hankam": "pertahanan dan keamanan", "hehe": "he", "helo": "halo", "hey": "hai", "hlm": "halaman", "hny": "hanya", "hoax": "isu bohong", "hr": "hari", "hrus": "harus", "hubdar": "perhubungan darat", "huff": "mengeluh", "hum": "rumah", "humz": "rumah", "ilang": "hilang", "ilfil": "tidak suka", "imho": "in my humble opinion", "imoetz": "imut", "item": "hitam", "itungan": "hitungan", "iye": "iya", "ja": "saja", "jadiin": "jadi", "jaim": "jaga image", "jayus": "tidak lucu", "jdi": "jadi", "jem": "jam", "jga": "juga", "jgnkan": "jangankan", "jir": "anjing", "jln": "jalan", "jomblo": "tidak punya pacar", "jubir": "juru bicara", "jutek": "galak", "k": "ke", "kab": "kabupaten", "kabor": "kabur", "kacrut": "kacau", "kadiv": "kepala divisi", "kagak": "tidak", "kalo": "kalau", "kampret": "sialan", "kamtibmas": "keamanan dan ketertiban masyarakat", "kamuwh": "kamu", "kanwil": "kantor wilayah", "karna": "karena", "kasubbag": "kepala subbagian", "katrok": "kampungan", "kayanya": "kayaknya", "kbr": "kabar", "kdu": "harus", "kec": "kecamatan", "kejurnas": "kejuaraan nasional", "kekeuh": "keras kepala", "kel": "kelurahan", "kemaren": "kemarin", "kepengen": "mau", "kepingin": "mau", "kepsek": "kepala sekolah", "kesbang": "kesatuan bangsa", "kesra": "kesejahteraan rakyat", "ketrima": "diterima", "kgiatan": "kegiatan", "kibul": "bohong", "kimpoi": "kawin", "kl": "kalau", "klianz": "kalian", "kloter": "kelompok terbang", "klw": "kalau", "km": "kamu", "kmps": "kampus", "kmrn": "kemarin", "knal": "kenal", "knp": "kenapa", "kodya": "kota madya", "komdis": "komisi disiplin", "komsov": "komunis sovyet", "kongkow": "kumpul bareng teman-teman", "kopdar": "kopi darat", "korup": "korupsi", "kpn": "kapan", "krenz": "keren", "krm": "kirim", "kt": "kita", "ktmu": "ketemu", "ktr": "kantor", "kuper": "kurang pergaulan", "kw": "imitasi", "kyk": "seperti", "la": "lah", "lam": "salam", "lamp": "lampiran", "lanud": "landasan udara", "latgab": "latihan gabungan", "lebay": "berlebihan", "leh": "boleh", "lelet": "lambat", "lemot": "lambat", "lgi": "lagi", "lgsg": "langsung", "liat": "lihat", "litbang": "penelitian dan pengembangan", "lmyn": "lumayan", "lo": "kamu", "loe": "kamu", "lola": "lambat berfikir", "louph": "cinta", "low": "kalau", "lp": "lupa", "luber": "langsung, umum, bebas, dan rahasia", "luchuw": "lucu", "lum": "belum", "luthu": "lucu", "lwn": "lawan", "maacih": "terima kasih", "mabal": "bolos", "macem": "macam", "macih": "masih", "maem": "makan", "magabut": "makan gaji buta", "maho": "homo", "mak jang": "kaget", "maksain": "memaksa", "malem": "malam", "mam": "makan", "maneh": "kamu", "maniez": "manis", "mao": "mau", "masukin": "masukkan", "melu": "ikut", "mepet": "dekat sekali", "mgu": "minggu", "migas": "minyak dan gas bumi", "mikol": "minuman beralkohol", "miras": "minuman keras", "mlah": "malah", "mngkn": "mungkin", "mo": "mau", "mokad": "mati", "moso": "masa", "mpe": "sampai", "msk": "masuk", "mslh": "masalah", "mt": "makan teman", "mubes": "musyawarah besar", "mulu": "melulu", "mumpung": "selagi", "munas": "musyawarah nasional", "muntaber": "muntah dan berak", "musti": "mesti", "muupz": "maaf", "mw": "now watching", "n": "dan", "nanam": "menanam", "nanya": "bertanya", "napa": "kenapa", "napi": "narapidana", "napza": "narkotika, alkohol, psikotropika, dan zat adiktif ", "narkoba": "narkotika, psikotropika, dan obat terlarang", "nasgor": "nasi goreng", "nda": "tidak", "ndiri": "sendiri", "ne": "ini", "nekolin": "neokolonialisme", "nembak": "menyatakan cinta", "ngabuburit": "menunggu berbuka puasa", "ngaku": "mengaku", "ngambil": "mengambil", "nganggur": "tidak punya pekerjaan", "ngapah": "kenapa", "ngaret": "terlambat", "ngasih": "memberikan", "ngebandel": "berbuat bandel", "ngegosip": "bergosip", "ngeklaim": "mengklaim", "ngeksis": "menjadi eksis", "ngeles": "berkilah", "ngelidur": "menggigau", "ngerampok": "merampok", "ngga": "tidak", "ngibul": "berbohong", "ngiler": "mau", "ngiri": "iri", "ngisiin": "mengisikan", "ngmng": "bicara", "ngomong": "bicara", "ngubek2": "mencari-cari", "ngurus": "mengurus", "nie": "ini", "nih": "ini", "niyh": "nih", "nmr": "nomor", "nntn": "nonton", "nobar": "nonton bareng", "np": "now playing", "ntar": "nanti", "ntn": "nonton", "numpuk": "bertumpuk", "nutupin": "menutupi", "nyari": "mencari", "nyekar": "menyekar", "nyicil": "mencicil", "nyoblos": "mencoblos", "nyokap": "ibu", "ogah": "tidak mau", "ol": "online", "ongkir": "ongkos kirim", "oot": "out of topic", "org2": "orang-orang", "ortu": "orang tua", "otda": "otonomi daerah", "otw": "on the way, sedang di jalan", "pacal": "pacar", "pake": "pakai", "pala": "kepala", "pansus": "panitia khusus", "parpol": "partai politik", "pasutri": "pasangan suami istri", "pd": "pada", "pede": "percaya diri", "pelatnas": "pemusatan latihan nasional", "pemda": "pemerintah daerah", "pemkot": "pemerintah kota", "pemred": "pemimpin redaksi", "penjas": "pendidikan jasmani", "perda": "peraturan daerah", "perhatiin": "perhatikan", "pesenan": "pesanan", "pgang": "pegang", "pi": "tapi", "pilkada": "pemilihan kepala daerah", "pisan": "sangat", "pk": "penjahat kelamin", "plg": "paling", "pmrnth": "pemerintah", "polantas": "polisi lalu lintas", "ponpes": "pondok pesantren", "pp": "pulang pergi", "prg": "pergi", "prnh": "pernah", "psen": "pesan", "pst": "pasti", "pswt": "pesawat", "pw": "posisi nyaman", "qmu": "kamu", "rakor": "rapat koordinasi", "ranmor": "kendaraan bermotor", "re": "reply", "ref": "referensi", "rehab": "rehabilitasi", "rempong": "sulit", "repp": "balas", "restik": "reserse narkotika", "rhs": "rahasia", "rmh": "rumah", "ru": "baru", "ruko": "rumah toko", "rusunawa": "rumah susun sewa", "ruz": "terus", "saia": "saya", "salting": "salah tingkah", "sampe": "sampai", "samsek": "sama sekali", "sapose": "siapa", "satpam": "satuan pengamanan", "sbb": "sebagai berikut", "sbh": "sebuah", "sbnrny": "sebenarnya", "scr": "secara", "sdgkn": "sedangkan", "sdkt": "sedikit", "se7": "setuju", "sebelas dua belas": "mirip", "sembako": "sembilan bahan pokok", "sempet": "sempat", "sendratari": "seni drama tari", "sgt": "sangat", "shg": "sehingga", "siech": "sih", "sikon": "situasi dan kondisi", "sinetron": "sinema elektronik", "siramin": "siramkan", "sj": "saja", "skalian": "sekalian", "sklh": "sekolah", "skt": "sakit", "slesai": "selesai", "sll": "selalu", "slma": "selama", "slsai": "selesai", "smpt": "sempat", "smw": "semua", "sndiri": "sendiri", "soljum": "sholat jumat", "songong": "sombong", "sory": "maaf", "sosek": "sosial-ekonomi", "sotoy": "sok tahu", "spa": "siapa", "sppa": "siapa", "spt": "seperti", "srtfkt": "sertifikat", "stiap": "setiap", "stlh": "setelah", "suk": "masuk", "sumpek": "sempit", "syg": "sayang", "t4": "tempat", "tajir": "kaya", "tau": "tahu", "taw": "tahu", "td": "tadi", "tdk": "tidak", "teh": "kakak perempuan", "telat": "terlambat", "telmi": "telat berpikir", "temen": "teman", "tengil": "menyebalkan", "tepar": "terkapar", "tggu": "tunggu", "tgu": "tunggu", "thankz": "terima kasih", "thn": "tahun", "tilang": "bukti pelanggaran", "tipiwan": "TvOne", "tks": "terima kasih", "tlp": "telepon", "tls": "tulis", "tmbah": "tambah", "tmen2": "teman-teman", "tmpah": "tumpah", "tmpt": "tempat", "tngu": "tunggu", "tnyta": "ternyata", "tokai": "tai", "toserba": "toko serba ada", "tpi": "tapi", "trdhulu": "terdahulu", "trima": "terima kasih", "trm": "terima", "trs": "terus", "trutama": "terutama", "ts": "penulis", "tst": "tahu sama tahu", "ttg": "tentang", "tuch": "tuh", "tuir": "tua", "tw": "tahu", "u": "kamu", "ud": "sudah", "udah": "sudah", "ujg": "ujung", "ul": "ulangan", "unyu": "lucu", "uplot": "unggah", "urang": "saya", "usah": "perlu", "utk": "untuk", "valas": "valuta asing", "w/": "dengan", "wadir": "wakil direktur", "wamil": "wajib militer", "warkop": "warung kopi", "warteg": "warung tegal", "wat": "buat", "wkt": "waktu", "wtf": "what the fuck", "xixixi": "tertawa", "ya": "iya", "yap": "iya", "yaudah": "ya sudah", "yawdah": "ya sudah", "yg": "yang", "yl": "yang lain", "yo": "iya", "yowes": "ya sudah", "yup": "iya", "7an": "tujuan", "ababil": "abg labil", "acc": "accord", "adlah": "adalah", "adoh": "aduh", "aha": "tertawa", "aing": "saya", "aja": "saja", "ajj": "saja", "aka": "dikenal juga sebagai", "akko": "aku", "akku": "aku", "akyu": "aku", "aljasa": "asal jadi saja", "ama": "sama", "ambl": "ambil", "anjir": "anjing", "ank": "anak", "ap": "apa", "apaan": "apa", "ape": "apa", "aplot": "unggah", "apva": "apa", "aqu": "aku", "asap": "sesegera mungkin", "aseek": "asyik", "asek": "asyik", "aseknya": "asyiknya", "asoy": "asyik", "astrojim": "astagfirullahaladzim", "ath": "kalau begitu", "atuh": "kalau begitu", "ava": "avatar", "aws": "awas", "ayang": "sayang", "ayok": "ayo", "bacot": "banyak bicara", "bales": "balas", "bangdes": "pembangunan desa", "bangkotan": "tua", "banpres": "bantuan presiden", "bansarkas": "bantuan sarana kesehatan", "bazis": "badan amal, zakat, infak, dan sedekah", "bcoz": "karena", "beb": "sayang", "bejibun": "banyak", "belom": "belum", "bener": "benar", "ber2": "berdua", "berdikari": "berdiri di atas kaki sendiri", "bet": "banget", "beti": "beda tipis", "beut": "banget", "bgd": "banget", "bgs": "bagus", "bhubu": "tidur", "bimbuluh": "bimbingan dan penyuluhan", "bisi": "kalau-kalau", "bkn": "bukan", "bl": "beli", "blg": "bilang", "blm": "belum", "bls": "balas", "bnchi": "benci", "bngung": "bingung", "bnyk": "banyak", "bohay": "badan aduhai", "bokep": "porno", "bokin": "pacar", "bole": "boleh", "bolot": "bodoh", "bonyok": "ayah ibu", "bpk": "bapak", "brb": "segera kembali", "brngkt": "berangkat", "brp": "berapa", "brur": "saudara laki-laki", "bsa": "bisa", "bsk": "besok", "bu_bu": "tidur", "bubarin": "bubarkan", "buber": "buka bersama", "bujubune": "luar biasa", "buser": "buru sergap", "bwhn": "bawahan", "byar": "bayar", "byr": "bayar", "c8": "chat", "cabut": "pergi", "caem": "cakep", "cama-cama": "sama-sama", "cangcut": "celana dalam", "cape": "capek", "caur": "jelek", "cekak": "tidak ada uang", "cekidot": "coba lihat", "cemplungin": "cemplungkan", "ceper": "pendek", "ceu": "kakak perempuan", "cewe": "cewek", "cibuk": "sibuk", "cin": "cinta", "ciye": "cie", "ckck": "ck", "clbk": "cinta lama bersemi kembali", "cmpr": "campur", "cnenk": "senang", "congor": "mulut", "cow": "cowok", "coz": "karena", "cpa": "siapa", "gokil": "gila", "gombal": "suka merayu", "gpl": "tidak pakai lama", "gpp": "tidak apa-apa", "gretong": "gratis", "gt": "begitu", "gtw": "tidak tahu", "gue": "saya", "guys": "teman-teman", "gws": "cepat sembuh", "haghaghag": "tertawa", "hakhak": "tertawa", "handak": "bahan peledak", "hansip": "pertahanan sipil", "hellow": "halo", "helow": "halo", "hi": "hai", "hlng": "hilang", "hnya": "hanya", "houm": "rumah", "hrs": "harus", "hubad": "hubungan angkatan darat", "hubla": "perhubungan laut", "huft": "mengeluh", "humas": "hubungan masyarakat", "idk": "saya tidak tahu", "ilfeel": "tidak suka", "imba": "jago sekali", "imoet": "imut", "info": "informasi", "itung": "hitung", "isengin": "bercanda", "iyala": "iya lah", "iyo": "iya", "jablay": "jarang dibelai", "jadul": "jaman dulu", "jancuk": "anjing", "jd": "jadi", "jdikan": "jadikan", "jg": "juga", "jgn": "jangan", "jijay": "jijik", "jkt": "jakarta", "jnj": "janji", "jth": "jatuh", "jurdil": "jujur adil", "jwb": "jawab", "ka": "kakak", "kabag": "kepala bagian", "kacian": "kasihan", "kadit": "kepala direktorat", "kaga": "tidak", "kaka": "kakak", "kamtib": "keamanan dan ketertiban", "kamuh": "kamu", "kamyu": "kamu", "kapt": "kapten", "kasat": "kepala satuan", "kasubbid": "kepala subbidang", "kau": "kamu", "kbar": "kabar", "kcian": "kasihan", "keburu": "terlanjur", "kedubes": "kedutaan besar", "kek": "seperti", "keknya": "kayaknya", "keliatan": "kelihatan", "keneh": "masih", "kepikiran": "terpikirkan", "kepo": "mau tahu urusan orang", "kere": "tidak punya uang", "kesian": "kasihan", "ketauan": "ketahuan", "keukeuh": "keras kepala", "khan": "kan", "kibus": "kaki busuk", "kk": "kakak", "klian": "kalian", "klo": "kalau", "kluarga": "keluarga", "klwrga": "keluarga", "kmari": "kemari", "kmpus": "kampus", "kn": "kan", "knl": "kenal", "knpa": "kenapa", "kog": "kok", "kompi": "komputer", "komtiong": "komunis Tiongkok", "konjen": "konsulat jenderal", "koq": "kok", "kpd": "kepada", "kptsan": "keputusan", "krik": "garing", "krn": "karena", "ktauan": "ketahuan", "ktny": "katanya", "kudu": "harus", "kuq": "kok", "ky": "seperti", "kykny": "kayanya", "laka": "kecelakaan", "lambreta": "lambat", "lansia": "lanjut usia", "lapas": "lembaga pemasyarakatan", "lbur": "libur", "lekong": "laki-laki", "lg": "lagi", "lgkp": "lengkap", "lht": "lihat", "linmas": "perlindungan masyarakat", "lmyan": "lumayan", "lngkp": "lengkap", "loch": "loh", "lol": "tertawa", "lom": "belum", "loupz": "cinta", "lowh": "kamu", "lu": "kamu", "luchu": "lucu", "luff": "cinta", "luph": "cinta", "lw": "kamu", "lwt": "lewat", "maaciw": "terima kasih", "mabes": "markas besar", "macem-macem": "macam-macam", "madesu": "masa depan suram", "maen": "main", "mahatma": "maju sehat bersama", "mak": "ibu", "makasih": "terima kasih", "malah": "bahkan", "malu2in": "memalukan", "mamz": "makan", "manies": "manis", "mantep": "mantap", "markus": "makelar kasus", "mba": "mbak", "mending": "lebih baik", "mgkn": "mungkin", "mhn": "mohon", "miker": "minuman keras", "milis": "mailing list", "mksd": "maksud", "mls": "malas", "mnt": "minta", "moge": "motor gede", "mokat": "mati", "mosok": "masa", "msh": "masih", "mskpn": "meskipun", "msng2": "masing-masing", "muahal": "mahal", "muker": "musyawarah kerja", "mumet": "pusing", "muna": "munafik", "munaslub": "musyawarah nasional luar biasa", "musda": "musyawarah daerah", "muup": "maaf", "muuv": "maaf", "nal": "kenal", "nangis": "menangis", "naon": "apa", "napol": "narapidana politik", "naq": "anak", "narsis": "bangga pada diri sendiri", "nax": "anak", "ndak": "tidak", "ndut": "gendut", "nekolim": "neokolonialisme", "nelfon": "menelepon", "ngabis2in": "menghabiskan", "ngakak": "tertawa", "ngambek": "marah", "ngampus": "pergi ke kampus", "ngantri": "mengantri", "ngapain": "sedang apa", "ngaruh": "berpengaruh", "ngawur": "berbicara sembarangan", "ngeceng": "kumpul bareng-bareng", "ngeh": "sadar", "ngekos": "tinggal di kos", "ngelamar": "melamar", "ngeliat": "melihat", "ngemeng": "bicara terus-terusan", "ngerti": "mengerti", "nggak": "tidak", "ngikut": "ikut", "nginep": "menginap", "ngisi": "mengisi", "ngmg": "bicara", "ngocol": "lucu", "ngomongin": "membicarakan", "ngumpul": "berkumpul", "ni": "ini", "nyasar": "tersesat", "nyariin": "mencari", "nyiapin": "mempersiapkan", "nyiram": "menyiram", "nyok": "ayo", "o/": "oleh", "ok": "ok", "priksa": "periksa", "pro": "profesional", "psn": "pesan", "psti": "pasti", "puanas": "panas", "qmo": "kamu", "qt": "kita", "rame": "ramai", "raskin": "rakyat miskin", "red": "redaksi", "reg": "register", "rejeki": "rezeki", "renstra": "rencana strategis", "reskrim": "reserse kriminal", "sni": "sini", "somse": "sombong sekali", "sorry": "maaf", "sosbud": "sosial-budaya", "sospol": "sosial-politik", "sowry": "maaf", "spd": "sepeda", "sprti": "seperti", "spy": "supaya", "stelah": "setelah", "subbag": "subbagian", "sumbangin": "sumbangkan", "sy": "saya", "syp": "siapa", "tabanas": "tabungan pembangunan nasional", "tar": "nanti", "taun": "tahun", "tawh": "tahu", "tdi": "tadi", "te2p": "tetap", "tekor": "rugi", "telkom": "telekomunikasi", "telp": "telepon", "temen2": "teman-teman", "tengok": "menjenguk", "terbitin": "terbitkan", "tgl": "tanggal", "thanks": "terima kasih", "thd": "terhadap", "thx": "terima kasih", "tipi": "TV", "tkg": "tukang", "tll": "terlalu", "tlpn": "telepon", "tman": "teman", "tmbh": "tambah", "tmn2": "teman-teman", "tmph": "tumpah", "tnda": "tanda", "tnh": "tanah", "togel": "toto gelap", "tp": "tapi", "tq": "terima kasih", "trgntg": "tergantung", "trims": "terima kasih", "cb": "coba", "y": "ya", "munfik": "munafik", "reklamuk": "reklamasi", "sma": "sama", "tren": "trend", "ngehe": "kesal", "mz": "mas", "analisise": "analisis", "sadaar": "sadar", "sept": "september", "nmenarik": "menarik", "zonk": "bodoh", "rights": "benar", "simiskin": "miskin", "ngumpet": "sembunyi", "hardcore": "keras", "akhirx": "akhirnya", "solve": "solusi", "watuk": "batuk", "ngebully": "intimidasi", "masy": "masyarakat", "still": "masih", "tauk": "tahu", "mbual": "bual", "tioghoa": "tionghoa", "ngentotin": "senggama", "kentot": "senggama", "faktakta": "fakta", "sohib": "teman", "rubahnn": "rubah", "trlalu": "terlalu", "nyela": "cela", "heters": "pembenci", "nyembah": "sembah", "most": "paling", "ikon": "lambang", "light": "terang", "pndukung": "pendukung", "setting": "atur", "seting": "akting", "next": "lanjut", "waspadalah": "waspada", "gantengsaya": "ganteng", "parte": "partai", "nyerang": "serang", "nipu": "tipu", "ktipu": "tipu", "jentelmen": "berani", "buangbuang": "buang", "tsangka": "tersangka", "kurng": "kurang", "ista": "nista", "less": "kurang", "koar": "teriak", "paranoid": "takut", "problem": "masalah", "tahi": "kotoran", "tirani": "tiran", "tilep": "tilap", "happy": "bahagia", "tak": "tidak", "penertiban": "tertib", "uasai": "kuasa", "mnolak": "tolak", "trending": "trend", "taik": "tahi", "wkwkkw": "tertawa", "ahokncc": "ahok", "istaa": "nista", "benarjujur": "jujur", "mgkin": "mungkin","sdh":"sudah","pernh":"pernah","pnya":"punya", "syulitttt":"sulit", "jelass":"jelas","mu":"kamu", "and":"dan","pingin":"ingin", "kenerjanya":"kenerjanya","tmb":"tambah","dpat":"dapat","ttp":"tetap","lm":"lama", "majuterus":"maju terus","idonesia":"indonesia","menugumu":"menunggumu", "eloe":"kamu", "rja":"kerja", "so":"pasti","anes":"anies"}

In [10]:
# Membuat fungsi untuk mengubah slang words menjadi kata Indonesia yang benar,
def replace_slang_words(text):
    words = nltk.word_tokenize(text.lower())
    words_filtered = [word for word in words if word not in stopwords.words('indonesian')]
    for i in range(len(words_filtered)):
        if words_filtered[i] in slang_dict:
            words_filtered[i] = slang_dict[words_filtered[i]]
    return ' '.join(words_filtered)

# Memasukan Kata yang telah di clean ke dalam fungsi deteksi slang words
slang_words=[]
for i in range(len(clean)):
  slang = replace_slang_words(clean[i])
  slang_words.append(slang)

data_slang = pd.DataFrame(slang_words, columns=["Slang Word Corection"])
data_slang

Unnamed: 0,Slang Word Corection
0,saatx bapak prabowo pimpin indonesia
1,prabowo sudah tua sudah baun tanah ganjar ngak...
2,prabowo prabowo suka ngejelek jelekin anies ba...
3,memilih ri anis rasyid baswedan baru mohon maa...
4,sudah pernah
5,penjahat kelamin prabowo punya hati mulia ikhlas
6,bravo ganjar prabowo dan anies
7,rakyat ingin bbm listrik sebako harga murah ko...
8,iya kalau ganjar iya kalau anies no anies jadi...
9,memangnya anis mimpin dki saja dapat warisan a...


### 4. Steaming 

#### Instalasi Library Sastrawi

In [11]:
pip install Sastrawi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


In [12]:
#Import Library Sastrawi
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

#Membuat fungsi steaming dengan library
factory = StemmerFactory()
steaming = factory.create_stemmer()

#Melakukan perulangan untuk memasukan kata kedalam fungsi steaming
hasil_steaming = []
for i in range (len(slang_words)):
  stem = steaming.stem(slang_words[i])
  hasil_steaming.append(stem)

#Menampilkan data hasil steaming kedalam dataframe
data_steaming = pd.DataFrame(hasil_steaming, columns=["Steaming"])
data_steaming

Unnamed: 0,Steaming
0,saatx bapak prabowo pimpin indonesia
1,prabowo sudah tua sudah baun tanah ganjar ngak...
2,prabowo prabowo suka ngejelek jelekin anies ba...
3,pilih ri anis rasyid baswedan baru mohon maaf ...
4,sudah pernah
5,jahat kelamin prabowo punya hati mulia ikhlas
6,bravo ganjar prabowo dan anies
7,rakyat ingin bbm listrik bako harga murah koru...
8,iya kalau ganjar iya kalau anies no anies jadi...
9,memang anis mimpin dki saja dapat waris ahok a...


###5. Tokenizing dan Stop Words

In [13]:
#Import Libarary sastrawi stop words
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

#Inisialisasi fungsi stop words
stop_factory = StopWordRemoverFactory()

words = []

#Membuat perulangan untuk memasukkan dataset ke dalam tekonisasi dan list stopwords
for i in range (len(hasil_steaming)):

  #Inisialisai fungsi tokenisasi dan stopword
  tokens = word_tokenize(hasil_steaming[i])
  more_stopword = ['dengan', 'ia','bahwa','oleh','aalysis','aam','kunci']
  data = stop_factory.get_stop_words()+more_stopword
  stopword = stop_factory.create_stop_word_remover()

  #Melakukan removed kata
  removed = []
  for t in tokens:
      if t not in data:
          removed.append(t)
  
  #Memasukkan hasil removed kedalem variable words
  words.append(removed)
  print(removed)

['saatx', 'bapak', 'prabowo', 'pimpin', 'indonesia']
['prabowo', 'tua', 'baun', 'tanah', 'ganjar', 'ngak', 'prestasi', 'anis', 'bukti', 'bawa', 'ubah', 'jakarta']
['prabowo', 'prabowo', 'suka', 'ngejelek', 'jelekin', 'anies', 'baswedan', 'si', 'andre', 'rosiade', 'jelas', 'emang']
['pilih', 'ri', 'anis', 'rasyid', 'baswedan', 'baru', 'mohon', 'maaf', 'prabowo', 'kecewa', 'baru', 'suara', 'asli', 'kamu', 'pindah', 'anies']
['pernah']
['jahat', 'kelamin', 'prabowo', 'punya', 'hati', 'mulia', 'ikhlas']
['bravo', 'ganjar', 'prabowo', 'anies']
['rakyat', 'bbm', 'listrik', 'bako', 'harga', 'murah', 'koruptor', 'miskin', 'hukum', 'umur', 'hidup']
['iya', 'kalau', 'ganjar', 'iya', 'kalau', 'anies', 'no', 'anies', 'jadi', 'tri', 'kerja', 'jadi', 'gubenur', 'kenerjanya', 'tambah', 'bobrok', 'lebih', 'bayar', 'iya', 'kalau', 'yohanes']
['memang', 'anis', 'mimpin', 'dki', 'waris', 'ahok', 'anis', 'antek', 'radikal']
['capres', 'demokrasi', 'sehat', 'cinta', 'nkri', 'beliau', 'jujur', 'kaya', 'raya

#### Melakukan penggabungan kata dari hasil tokenisasi

In [14]:
gabung=[]
#Membuat perulangan untuk menggabungkan kata
for i in range(len(words)):
  joinkata = ' '.join(words[i])
  gabung.append(joinkata)

result = pd.DataFrame(gabung, columns=['Join Kata'])
result

Unnamed: 0,Join Kata
0,saatx bapak prabowo pimpin indonesia
1,prabowo tua baun tanah ganjar ngak prestasi an...
2,prabowo prabowo suka ngejelek jelekin anies ba...
3,pilih ri anis rasyid baswedan baru mohon maaf ...
4,pernah
5,jahat kelamin prabowo punya hati mulia ikhlas
6,bravo ganjar prabowo anies
7,rakyat bbm listrik bako harga murah koruptor m...
8,iya kalau ganjar iya kalau anies no anies jadi...
9,memang anis mimpin dki waris ahok anis antek r...




## Melakukan ekstraksi fitur dengan TF-IDF

In [15]:
#Import fungsi tf-idf
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer

In [16]:
# CountVectorizer
countvectorizer = CountVectorizer(analyzer= 'word', stop_words='english')
count_wm = countvectorizer.fit_transform(gabung)
count_tokens = countvectorizer.get_feature_names_out()
df_countvect = pd.DataFrame(data = count_wm.toarray(),columns = count_tokens)
print("Count Vectorizer\n")
df_countvect

Count Vectorizer



Unnamed: 0,ahok,anak,andre,anies,anis,antek,asli,bako,bang,bangga,...,terus,tetap,tri,tua,tunggu,ubah,umur,utang,waris,yohanes
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
2,0,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
8,0,0,0,2,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,1
9,1,0,0,0,2,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [17]:
# TfidfVectorizer 
tfidfvectorizer = TfidfVectorizer(analyzer='word',stop_words= 'english')
tfidf_wm = tfidfvectorizer.fit_transform(gabung)
tfidf_tokens = tfidfvectorizer.get_feature_names_out()
df_tfidfvect = pd.DataFrame(data = tfidf_wm.toarray(),columns = tfidf_tokens)
print("\nTF-IDF Vectorizer\n")
df_tfidfvect



TF-IDF Vectorizer



Unnamed: 0,ahok,anak,andre,anies,anis,antek,asli,bako,bang,bangga,...,terus,tetap,tri,tua,tunggu,ubah,umur,utang,waris,yohanes
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.249797,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.310089,0.0,0.310089,0.0,0.0,0.0,0.0
2,0.0,0.0,0.309691,0.230092,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.194016,0.210361,0.0,0.261135,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.514124,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.304495,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.304495,0.0,0.0,0.0
8,0.0,0.0,0.0,0.268258,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.18053,0.0,0.0,0.0,0.0,0.0,0.0,0.18053
9,0.32282,0.0,0.0,0.0,0.520106,0.32282,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32282,0.0


## Topic Modeling menggunakan Singular Value Decompotion

In [18]:
from sklearn.decomposition import TruncatedSVD
# SVD represent documents and terms in vectors 
svd_model = TruncatedSVD(n_components=2, algorithm='randomized', n_iter=100, random_state=122)
lsa = svd_model.fit_transform(tfidf_wm)

In [19]:
pd.options.display.float_format = '{:,.16f}'.format
topic_encoded_df = pd.DataFrame(lsa, columns = ["topic_1", "topic_2"])
topic_encoded_df["documents"] = result
display(topic_encoded_df[["documents", "topic_1", "topic_2"]])

Unnamed: 0,documents,topic_1,topic_2
0,saatx bapak prabowo pimpin indonesia,0.2807086334538081,-0.2145918681092961
1,prabowo tua baun tanah ganjar ngak prestasi an...,0.3170714844500547,0.1502695266389585
2,prabowo prabowo suka ngejelek jelekin anies ba...,0.3924646679069024,-0.0695803411760915
3,pilih ri anis rasyid baswedan baru mohon maaf ...,0.2961820772020904,-0.0232033292620493
4,pernah,0.0,-1e-16
5,jahat kelamin prabowo punya hati mulia ikhlas,0.3888085933194474,-0.4655215895844184
6,bravo ganjar prabowo anies,0.7256819666718362,0.4578440053392835
7,rakyat bbm listrik bako harga murah koruptor m...,0.0286849688291715,-0.0599638246134637
8,iya kalau ganjar iya kalau anies no anies jadi...,0.2455469719693733,0.1906758922863045
9,memang anis mimpin dki waris ahok anis antek r...,0.0640686851498153,0.024543079089289


In [20]:
encoding_matrix = pd.DataFrame(svd_model.components_, index = ["topic_1","topic_2"], columns = (tfidf_tokens)).T
encoding_matrix

Unnamed: 0,topic_1,topic_2
ahok,0.0096253920703229,0.0046825468351582
anak,0.0016800698927125,-0.0003643572808694
andre,0.0565641860147366,-0.0127352652953357
anies,0.2730540344299273,0.1572239920123032
anis,0.0813638033603413,0.0268440269889983
...,...,...
ubah,0.0457568160602203,0.0275391159793209
umur,0.0040648731014779,-0.0107910175988629
utang,0.0046049868096495,0.0000717052518164
waris,0.0096253920703230,0.0046825468351583
