In [33]:
#AI feedback analyzer
#Analyze customer feedback using Hugging Face Transformers (for sentiment) and KMeans (for topics).
#Outputs percentage sentiment distribution and top discussion topics.


In [34]:
#install disp
!pip install pandas transformers torch fastapi uvicorn scikit-learn
import pandas as pd
from transformers import pipeline
from collections import Counter
import json



In [35]:
#example smaller fake data fro testing
data={
    "feedback": ["I love the new design, it's so clean",
                 "The app keep crashing after the last update",
                 "Customer support waas super helpful",
                 "Price is too high for the features offered",
                 "The software is too clumsy and inefficient when used with multiple accounts",
                 "The very first login page is not well designed"
                  ]
    }
df=pd.DataFrame(data)
df.to_csv("feedback.csv", index=False)

In [36]:
#loading the pretrained sentiment analysis model
sentiment_model=pipeline("sentiment-analysis")

#looping through the model's results
def analysis_feedback(feedback_list):
  results=sentiment_model(feedback_list) #feed the feedback list to the model

  sentiments=[r["label"] for r in results] # extract only the sentiment labels

  counts=Counter(sentiments) #count how many positive and negative labels we have

  total=sum(counts.values()) #sum it up

  sentiment_distribution={k: round(v/total*100,2) for k, v in counts.items()} #change to percentage

  summary = (
        f"Feedback analyzed: {len(feedback_list)} items. "
        f"Positive: {sentiment_distribution.get('POSITIVE', 0)}%, "
        f"Negative: {sentiment_distribution.get('NEGATIVE', 0)}%."
    )

  return {
      "sentiment_distribution":sentiment_distribution,
      "summary": summary
  }
# run the function

feedback = df["feedback"].tolist()
results=analysis_feedback(feedback)
print(json.dumps(results, indent=2))

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


{
  "sentiment_distribution": {
    "POSITIVE": 33.33,
    "NEGATIVE": 66.67
  },
  "summary": "Feedback analyzed: 6 items. Positive: 33.33%, Negative: 66.67%."
}


In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans


In [38]:
def extract_topics(feedback_list, n_clusters=3):
  #convert text into numerical vectors based on word importance
  vectorizer=TfidfVectorizer(stop_words='english')
  X=vectorizer.fit_transform(feedback_list)

  km=KMeans(n_clusters=n_clusters, random_state=42) #group similar feedback
  km.fit(X)

  order_centroids=km.cluster_centers_.argsort()[:, ::-1]
  terms=vectorizer.get_feature_names_out()

  topics=[]

  for i in range(n_clusters):
    top_terms = [terms[ind] for ind in order_centroids[i, :5]]
    topics.append(", ".join(top_terms))

  return topics
topics = extract_topics(feedback)
results["topics"] = topics
print(json.dumps(results, indent=2))

{
  "sentiment_distribution": {
    "POSITIVE": 33.33,
    "NEGATIVE": 66.67
  },
  "summary": "Feedback analyzed: 6 items. Positive: 33.33%, Negative: 66.67%.",
  "topics": [
    "update, crashing, app, offered, price",
    "page, designed, login, waas, used",
    "new, design, clean, love, super"
  ]
}


In [39]:
from google.colab import files

uploaded=files.upload()

Saving clean_feedback.csv to clean_feedback (1).csv


In [40]:
import os

def load_feedback_file(file_path):
  ext=os.path.splitext(file_path)[1].lower()

  if ext==".csv":
    df=pd.read_csv(file_path)
    if 'feedback' not in df.columns:
      raise ValueError("CSV file must have a column named 'feedback' column.")
    feedback_list=df['feedback'].dropna().astype(str).tolist()
  elif ext==".txt":
    with open(file_path, 'r', encoding='utf-8') as f:
      feedback_list=[line.strip() for line in f if line.strip()]
  else:
    raise ValueError("Unsupported file type. Please upload a .txt or .csv file.")

  return feedback_list

In [41]:
file_path=list(uploaded.keys())[0]
feedback=load_feedback_file(file_path)

print(f"Loaded {len(feedback)} feedback entries.")
print("first 10 examples:")
for f in feedback[:10]:
  print("-", f)

Loaded 96 feedback entries.
first 10 examples:
- I love this product!
- The service was terrible.
- This movie is amazing!
- I'm so disappointed with their customer support.
- Just had the best meal of my life!
- The quality of this product is subpar.
- I can't stop listening to this song. It's incredible!
- Their website is so user-friendly. Love it!
- I loved the movie! It was fantastic!
- The customer service was terrible.


In [42]:
from transformers import pipeline
from collections import Counter
import pandas as pd
import json

# 1. Load your cleaned CSV
df = pd.read_csv(file_path)

# 2. Convert the 'feedback' column to a list of text entries
feedback_list = df["feedback"].dropna().astype(str).tolist()

# 3. Load a pretrained sentiment analysis model from Hugging Face
sentiment_model = pipeline("sentiment-analysis")

# 4. Define the analysis function
def analyze_feedback(feedback_list):
    # Run the model on each feedback text
    results = sentiment_model(feedback_list)

    # Extract only the sentiment labels ('POSITIVE', 'NEGATIVE', etc.)
    sentiments = [r["label"] for r in results]

    # Count how many of each label we got
    counts = Counter(sentiments)

    # Total number of feedback items
    total = sum(counts.values())

    # Convert counts into percentages (rounded to two decimals)
    sentiment_distribution = {
        k: round(v / total * 100, 2) for k, v in counts.items()
    }

    # Create a readable summary sentence
    summary = (
        f"Feedback analyzed: {len(feedback_list)} items. "
        f"Positive: {sentiment_distribution.get('POSITIVE', 0)}%, "
        f"Negative: {sentiment_distribution.get('NEGATIVE', 0)}%."
    )

    # Return everything neatly in a dictionary
    return {
        "sentiment_distribution": sentiment_distribution,
        "summary": summary
    }

# 5. Run the function on your CSV data
results = analyze_feedback(feedback_list)

# 6. Print the output in a pretty JSON format
print(json.dumps(results, indent=2))


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


{
  "sentiment_distribution": {
    "POSITIVE": 54.17,
    "NEGATIVE": 45.83
  },
  "summary": "Feedback analyzed: 96 items. Positive: 54.17%, Negative: 45.83%."
}


In [43]:
# Create requirements.txt
with open("requirements.txt", "w") as f:
    f.write("transformers\npandas\nscikit-learn\n")

# Create README.md
readme_text = """
# AI Feedback Analyzer

An end-to-end sentiment and topic analysis system built with Hugging Face Transformers and Scikit-learn.

## Features
- Analyzes sentiment of customer feedback (positive/negative percentages)
- Extracts discussion topics using K-Means clustering
- Works with CSV or text files
- Outputs structured JSON summaries

## How to Run
1. Upload your feedback file (CSV or TXT) to Colab.
2. Run the notebook.
3. View sentiment results and top discussion topics.

## Example Output
```json
{
  "sentiment_distribution": {"POSITIVE": 63.33, "NEGATIVE": 36.67},
  "summary": "Feedback analyzed: 30 items. Positive: 63.33%, Negative: 36.67%.",
  "topics": ["price, expensive, plan", "design, clean, interface", "crash, bug, update"]
}
"""

with open("README.md", "w") as f:
    f.write(readme_text)


In [44]:
from google.colab import files
files.download("requirements.txt")
files.download("README.md")
files.download("report.json")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>