<a href="https://colab.research.google.com/github/Afifahnabila25/TelcoSent-ID-Indonesian-Telecommunication-Sentiment-Analyzer/blob/main/TelcoSent_ID_Indonesian_Telecommunication_Sentiment_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install Sastrawi -q

import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

# Initialize text processing tools
stopword_factory = StopWordRemoverFactory()
stopword_remover = stopword_factory.create_stop_word_remover()
stemmer_factory = StemmerFactory()
stemmer = stemmer_factory.create_stemmer()

def clean_text(text):
    """
    Preprocesses the input text by converting it to lowercase,
    removing noise (links, mentions, punctuation),
    filtering stopwords, and applying stemming.
    """
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|<user_mention>|#\w+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = stopword_remover.remove(text)
    text = stemmer.stem(text)
    return text

def main():
    print("[INFO] Fetching and processing dataset...")
    dataset_url = "https://raw.githubusercontent.com/rizalespe/Dataset-Sentimen-Analisis-Bahasa-Indonesia/master/dataset_tweet_sentiment_cellular_service_provider.csv"
    df = pd.read_csv(dataset_url)

    print("[INFO] Applying text preprocessing (this may take a moment)...")
    df['cleaned_text'] = df['Text Tweet'].apply(clean_text)

    # Feature extraction using N-Grams
    X = df['cleaned_text']
    y = df['Sentiment']
    vectorizer = TfidfVectorizer(ngram_range=(1, 2))
    X_vectorized = vectorizer.fit_transform(X)

    # Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

    # Model Training
    model = MultinomialNB()
    model.fit(X_train, y_train)

    # Evaluation
    y_pred = model.predict(X_test)
    print("\n--- Model Evaluation ---")
    print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%\n")
    print(classification_report(y_test, y_pred, zero_division=0))

    # Interactive CLI Setup
    print("\n" + "-"*55)
    print("[INFO] Starting interactive sentiment analysis CLI.")
    print("       Type 'exit' to quit.")
    print("-"*55)
    print("ðŸ’¡ EXAMPLE INPUTS (Telecommunication Context):")
    print("   [+] Positive: 'jaringan internet hari ini sangat lancar'")
    print("   [-] Negative: 'kecewa banget sinyal putus-putus terus'")
    print("-"*55 + "\n")

    # CLI Loop
    while True:
        user_input = input("Enter text to analyze: ")

        if user_input.lower() in ['exit', 'quit']:
            print("Exiting program.")
            break

        if not user_input.strip():
            continue

        # Clean text and predict
        processed_input = clean_text(user_input)
        input_vector = vectorizer.transform([processed_input])
        prediction = model.predict(input_vector)[0]

        # Print only the final prediction
        print(f"Prediction : {prediction.upper()}\n")

if __name__ == "__main__":
    main()