In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd

# Replace with your actual filename
df = pd.read_csv("Internet Service Provider.csv")
df.head()

In [None]:
df.columns

In [None]:
reviews_df = df.copy()

In [None]:
reviews_df.drop_duplicates(subset=['ID', 'number_reviews', 'Today', 'review_text', 'likes'], inplace=True)

In [None]:
print("Before:", df.shape)
print("After:", reviews_df.shape)

In [None]:
import re

def clean_review(text):
    if not isinstance(text, str):
        return ""

    # Fix common mojibake (encoding issues)
    text = text.encode('latin1', errors='ignore').decode('utf-8', errors='ignore')

    # Remove strange characters (non-ASCII)
    text = re.sub(r'[^\x00-\x7F]+', '', text)

    # Optional: Remove extra spaces or trailing dots
    text = re.sub(r'\s+', ' ', text).strip()

    text = re.sub(r'Edited Review.*?:', '', text, flags=re.IGNORECASE)

    text = re.sub(r'\b[a-zA-Z]\b', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    text = re.sub(r'\s+', ' ', text)

    return text


In [None]:
reviews_df["clean_text"] = reviews_df["review_text"].apply(clean_review)

In [None]:
# Show full text in each cell
pd.set_option('display.max_colwidth', None)

# If also working with large rows or many columns, you can increase this too:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

In [None]:
reviews_df.head()

In [None]:
# Drop the old, unclean 'review_text' column
reviews_df.drop(columns=["review_text"], inplace=True)

# Rename 'clean_text' to 'review_text'
reviews_df.rename(columns={"clean_text": "review_text"}, inplace=True)


# Rename 'clean_text' to 'review_text'
reviews_df.rename(columns={"ID": "id"}, inplace=True)

In [None]:
reviews_df.info()

In [None]:
import torch
device=0 if torch.cuda.is_available() else -1
# Check GPU status

torch.cuda.is_available()

In [None]:
reviews_df.head()

In [None]:
from transformers import pipeline
import pandas as pd
import torch

# ✅ STEP 3: Load the sentiment analysis pipeline
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    tokenizer="nlptown/bert-base-multilingual-uncased-sentiment",
    device=0 if torch.cuda.is_available() else -1
)

In [None]:
review = """While the internet speed during the day is generally good and streaming works well, the frequent disconnections at night, combined with the unresponsive customer support, make the overall experience very frustrating."""
result = sentiment_pipeline(review)
print(result)

In [None]:
# ✅ STEP 5: Apply the sentiment pipeline in batches
def predict_sentiment_batch(reviews, batch_size=32):
    results = []
    for i in range(0, len(reviews), batch_size):
        batch = reviews[i:i+batch_size].tolist()
        outputs = sentiment_pipeline(batch)
        for o in outputs:
            results.append((o["label"], o["score"]))
    return results

# Run sentiment predictions
sentiments = predict_sentiment_batch(reviews_df["review_text"])

In [None]:

# ✅ STEP 6: Add results to DataFrame
reviews_df["predicted_score"] = [label for label, _ in sentiments]
reviews_df["confidence_score"] = [score for _, score in sentiments]

In [None]:
# ✅ STEP 7: Map star labels to sentiment classes
def map_star_to_sentiment(label):
    try:
        star = int(label.split()[0])
        if star <= 2:
            return "Negative"
        elif star == 3:
            return "Neutral"
        else:
            return "Positive"
    except:
        return "Unknown"

reviews_df["sentiment"] = reviews_df["predicted_score"].apply(map_star_to_sentiment)

In [None]:
reviews_df.head()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
ax = reviews_df['predicted_score'].value_counts().sort_index() \
    .plot(kind='bar',
          title='Count of Reviews by Stars',
          figsize=(10, 5))
ax.set_xlabel('Review Stars')
plt.show()

In [None]:
reviews_df.info()

In [None]:

# ✅ STEP 8: Save to CSV
reviews_df.to_csv("wifi_reviews_sentiment.csv", index=False)
print("Sentiment analysis complete. Results saved to 'wifi_reviews_sentiment.csv'")

In [None]:
from google.colab import files
files.download("wifi_reviews_sentiment.csv")


In [None]:
!pip install wordcloud nltk --quiet

In [None]:
import pandas as pd
import nltk
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import string
import re

# Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

In [None]:
sentiment = pd.read_csv("wifi_reviews_sentiment.csv")
sentiment.head()

In [None]:
def clean_text(text):
    text = str(text).lower()                                # lowercase
    text = re.sub(r'\d+', '', text)                         # remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # remove punctuation
    tokens = text.split()                                   # tokenize
    tokens = [word for word in tokens if word not in stopwords.words('english')]  # remove stopwords
    return " ".join(tokens)

sentiment['cleaned_text'] = df['review_text'].apply(clean_text)

In [None]:
# Combine all cleaned reviews into a single string
all_text = " ".join(sentiment['cleaned_text'].dropna())

# Generate word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='viridis').generate(all_text)

# Display it
plt.figure(figsize=(15, 7))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title("Most Frequent Words in Reviews", fontsize=18)
plt.show()

In [None]:
wordcloud.to_file("reviews_wordcloud.png")
files.download("reviews_wordcloud.png")