# Scrapping Data Twitter using Library "snscrape"
## Grab data twitter from the Web (Scrap) and export it to CSV

### Import module / package

In [1]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
import csv
from tqdm.notebook import tqdm
from pandas_profiling import ProfileReport

### Scrap the Data

In [2]:
tweets_temp = []
# Creating list to append tweet data
search_words = "Harga (Telur OR Telor)"
search = search_words + " -filter:retweets"
date_since = '2021-09-01'
date_until = '2021-10-15'
lang = 'id'
search = search + ' since:' + date_since + ' until:' + date_until + ' lang:' + lang
tweets_count = 15000

print("Keyword : "+search_words)
print("Dari Tanggal : "+date_since)
print("Sampai Tanggal : "+date_until)
print("Bahasa (unicode) : "+lang)

# Using TwitterSearchScraper to scrape data and append tweets to list
# for i,tweet in enumerate(sntwitter.TwitterSearchScraper(search).get_items()):
#     if i>tweets_count: #number of tweets you want to scrape
#         break
#     tweets_temp.append([tweet.date, tweet.id, tweet.content, tweet.user.username])
    
for i, tweet in enumerate(tqdm(sntwitter.TwitterSearchScraper(search).get_items(), desc="Proses Scrapping Data", total=tweets_count)):
    if i>tweets_count:
        break
    tweets_temp.append([tweet.date, tweet.id, tweet.content, tweet.user.username])


# Creating a dataframe from the tweets list above 
df_tweets = pd.DataFrame(tweets_temp, columns=['Datetime', 'Tweet Id', 'Tweet', 'Username'])

Keyword : Harga (Telur OR Telor)
Dari Tanggal : 2021-09-01
Sampai Tanggal : 2021-10-15
Bahasa (unicode) : id


Proses Scrapping Data:   0%|          | 0/15000 [00:00<?, ?it/s]

### Show Result of the Scrap (in Pandas DataFrame)

In [3]:
df_tweets

Unnamed: 0,Datetime,Tweet Id,Tweet,Username
0,2021-10-14 23:06:49+00:00,1448787405549367308,@awesomecemok @Zlhlmi____ JB...harga ayam rm8....,RezanHassan5401
1,2021-10-14 18:11:53+00:00,1448713183842091008,Berdikari serap telur dari peternak untuk stab...,ElshintaDotcom
2,2021-10-14 17:43:33+00:00,1448706053613375504,Penyebab Harga Telur Makin Anjlok Versi Pedaga...,AlbertSolo2
3,2021-10-14 17:38:05+00:00,1448704679001542661,Turunnya permintaan masyarakat akan telur ayam...,AlbertSolo2
4,2021-10-14 17:31:27+00:00,1448703009714675742,"Tapi tidak bagi peternak ayam petelur, mereka ...",AlbertSolo2
...,...,...,...,...
2874,2021-09-01 05:15:52+00:00,1432935215442415620,"Blitar – Harga Telur Ayam Anjlok, Para Peterna...",MadutvT
2875,2021-09-01 04:26:33+00:00,1432922802244308997,"@susipudjiastuti titip sendal dulu, sebelum ba...",HafizSP2
2876,2021-09-01 03:03:19+00:00,1432901857756778500,@fxmario Dari kecil suka mooncake dgn isian te...,seretransel
2877,2021-09-01 02:48:40+00:00,1432898170283913218,"Harga Telur Anjlok, Peternak Di Jateng Babak B...",NAmiyanto


### Removing duplicates (if any) & export it to CSV

In [4]:
bruto = int(len(df_tweets))
df_tweets.drop_duplicates(subset=['Tweet'])
print("Dataset dibuang (Karena duplikat) : "+(str(bruto-int(len(df_tweets))))+" data")
print("Dataset masuk : "+str(len(df_tweets))+" data")
df_tweets.to_csv('data/tweets.csv',index=False,quoting=csv.QUOTE_ALL)

Dataset dibuang (Karena duplikat) : 0 data
Dataset masuk : 2879 data


### Test Data on the CSV

In [5]:
df2 = pd.read_csv("data/tweets.csv")
df2

Unnamed: 0,Datetime,Tweet Id,Tweet,Username
0,2021-10-14 23:06:49+00:00,1448787405549367308,@awesomecemok @Zlhlmi____ JB...harga ayam rm8....,RezanHassan5401
1,2021-10-14 18:11:53+00:00,1448713183842091008,Berdikari serap telur dari peternak untuk stab...,ElshintaDotcom
2,2021-10-14 17:43:33+00:00,1448706053613375504,Penyebab Harga Telur Makin Anjlok Versi Pedaga...,AlbertSolo2
3,2021-10-14 17:38:05+00:00,1448704679001542661,Turunnya permintaan masyarakat akan telur ayam...,AlbertSolo2
4,2021-10-14 17:31:27+00:00,1448703009714675742,"Tapi tidak bagi peternak ayam petelur, mereka ...",AlbertSolo2
...,...,...,...,...
2874,2021-09-01 05:15:52+00:00,1432935215442415620,"Blitar – Harga Telur Ayam Anjlok, Para Peterna...",MadutvT
2875,2021-09-01 04:26:33+00:00,1432922802244308997,"@susipudjiastuti titip sendal dulu, sebelum ba...",HafizSP2
2876,2021-09-01 03:03:19+00:00,1432901857756778500,@fxmario Dari kecil suka mooncake dgn isian te...,seretransel
2877,2021-09-01 02:48:40+00:00,1432898170283913218,"Harga Telur Anjlok, Peternak Di Jateng Babak B...",NAmiyanto


In [6]:
ProfileReport(df2,title='Profiling hasil grabber',html={'style':{'full_width':False,'theme':'flatly'}}).to_notebook_iframe()