In [None]:
# 1. Import library
import os
import pandas as pd
import tweepy
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from getpass import getpass

print("Imports OK")

In [None]:
# 2. Input credentials (recommended: getpass)
consumer_key = getpass("CONSUMER_KEY: ")
consumer_secret = getpass("CONSUMER_SECRET: ")
access_token = getpass("ACCESS_TOKEN: ")
access_token_secret = getpass("ACCESS_TOKEN_SECRET: ")
bearer_token = getpass("BEARER_TOKEN: ")
# option: store in runtime env for convenience
os.environ["CONSUMER_KEY"] = consumer_key
os.environ["CONSUMER_SECRET"] = consumer_secret
os.environ["ACCESS_TOKEN"] = access_token
os.environ["ACCESS_TOKEN_SECRET"] = access_token_secret
os.environ["BEARER_TOKEN"] = bearer_token
print("Credentials loaded to runtime (not saved to disk).")

In [None]:
# 3. OAuth1 (v1.1)
consumer_key = os.environ.get("CONSUMER_KEY")
consumer_secret = os.environ.get("CONSUMER_SECRET")
access_token = os.environ.get("ACCESS_TOKEN")
access_token_secret = os.environ.get("ACCESS_TOKEN_SECRET")

try:
    auth = tweepy.OAuth1UserHandler(consumer_key, consumer_secret, access_token, access_token_secret)
    api = tweepy.API(auth)
    api.verify_credentials()
    print("OAuth1 auth OK")
except Exception as e:
    print("OAuth1 error:", e)

In [None]:
# 4. Initialize Client v2 (Bearer)
bearer_token = os.environ.get("BEARER_TOKEN")
try:
    client = tweepy.Client(bearer_token=bearer_token)
    print("Client v2 initialized")
except Exception as e:
    print("Client init error:", e)

In [None]:
# 5. Search tweets
query = '(#Whoosh OR #KCIC OR #KeretaCepat OR "Whoosh" OR "KCIC" OR "kereta cepat") lang:id -is:retweet'
try:
    response = client.search_recent_tweets(query=query, max_results=100, tweet_fields=["created_at","author_id","public_metrics"])
    tweets = response.data if getattr(response,"data",None) else []
    print("Found tweets:", len(tweets))
except Exception as e:
    print("Search error:", e); tweets = []

In [None]:
# 6. Convert to DataFrame
import pandas as pd
data = []
for t in tweets:
    pm = t.public_metrics if getattr(t,"public_metrics",None) else {}
    data.append({
        "date": getattr(t,"created_at",None),
        "author_id": getattr(t,"author_id",None),
        "text": getattr(t,"text",""),
        "likes": pm.get("like_count",0),
        "retweets": pm.get("retweet_count",0),
        "replies": pm.get("reply_count",0)
    })
df = pd.DataFrame(data)
df

In [None]:
# 7. Load sentiment model (optional)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
model.eval()
print("Model loaded on", device)

In [None]:
# 8. Sentiment function
label_map = {0:"negative",1:"neutral",2:"positive"}
def detect_sentiment(text):
    if not isinstance(text,str) or text.strip()=="":
        return "neutral", 0.0
    enc = tokenizer(text, truncation=True, padding=True, max_length=128, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**enc).logits
    prob = F.softmax(logits, dim=1)[0].cpu().numpy()
    idx = int(prob.argmax())
    return label_map[idx], float(prob[idx])

In [None]:
# 9. Apply sentiment (optional)
sentiments = []
confs = []
for txt in df["text"]:
    s,c = detect_sentiment(txt)
    sentiments.append(s)
    confs.append(c)
df["sentiment"] = sentiments
df["confidence"] = confs
df


In [None]:
# Hitung jumlah tiap sentimen
sentiment_counts = df["sentiment"].value_counts()
print("Jumlah setiap sentimen:")
print(sentiment_counts)

# Hitung proporsi tiap sentimen (persentase)
sentiment_proportion = df["sentiment"].value_counts(normalize=True) * 100
print("\nProporsi setiap sentimen (%):")
print(sentiment_proportion.round(2))

In [None]:
# 10. WordCloud - Positive example
texts_pos = df.loc[df["sentiment"]=="positive","text"].tolist()
if texts_pos:
    wc = WordCloud(width=900, height=450, background_color="white").generate(" ".join(texts_pos))
    plt.figure(figsize=(12,6)); plt.imshow(wc); plt.axis("off"); plt.title("Positive"); plt.show()
else:
    print("No positive tweets")


In [None]:
# WordCloud - Neutral
texts_neu = df.loc[df["sentiment"]=="neutral", "text"].tolist()
if texts_neu:
    wc = WordCloud(width=900, height=450, background_color="white").generate(" ".join(texts_neu))
    plt.figure(figsize=(12,6))
    plt.imshow(wc)
    plt.axis("off")
    plt.title("WordCloud - Neutral Tweets")
    plt.show()
else:
    print("No neutral tweets")

In [None]:
# WordCloud - Negative
texts_neg = df.loc[df["sentiment"]=="negative", "text"].tolist()
if texts_neg:
    wc = WordCloud(width=900, height=450, background_color="white").generate(" ".join(texts_neg))
    plt.figure(figsize=(12,6))
    plt.imshow(wc)
    plt.axis("off")
    plt.title("WordCloud - Negative Tweets")
    plt.show()
else:
    print("No negative tweets")

In [None]:
# Save sebagai TXT (TSV format, tab-separated)
output_file = "tweets_with_sentiment.txt"
df.to_csv(output_file, sep="\t", index=False)
print("Saved to:", output_file)

from google.colab import files
files.download(output_file)

In [None]:
# Save TXT dalam format tabel rapi
txt_output = "tweets_pretty_table.txt"

with open(txt_output, "w", encoding="utf-8") as f:
    f.write(df.to_string())

print("Saved:", txt_output)

from google.colab import files
files.download(txt_output)