AI-Powered Sentiment Analysis and Entity Recognition for Customer Reviews

In [3]:
#Import Required Libraries
import os
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
import matplotlib.pyplot as plt

In [None]:
#Azure cognitive services credentials here
azure_key = 'TEXT_ANALYTICS_KEY'
azure_endpoint = 'TEXT_ANALYTICS_ENDPOINT'
translator_key = 'TRANSLATOR_API_KEY'
translator_endpoint = 'TRANSLATOR_ENDPOINT'

In [None]:
# Authenticate with Azure Text Analytics API
def authenticate_client():
    ta_credential = AzureKeyCredential(azure_key)
    text_analytics_client = TextAnalyticsClient(
        endpoint=azure_endpoint,
        credential=ta_credential)
    return text_analytics_client

text_analytics_client = authenticate_client()

In [None]:
#Data Preprocessing
nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text, language='en'):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    words = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)
review = "This is an awesome product! Totally worth the price."
cleaned_review = preprocess_text(review)
print(cleaned_review)

In [None]:
#Language Translation
import requests

def translate_text(text, target_language='en'):
    headers = {
        'Ocp-Apim-Subscription-Key': translator_key,
        'Ocp-Apim-Subscription-Region': 'your-region',
        'Content-Type': 'application/json'
    }
    body = [{'text': text}]
    translation_url = f"{translator_endpoint}/translate?api-version=3.0&to={target_language}"
    response = requests.post(translation_url, headers=headers, json=body)
    translation = response.json()[0]['translations'][0]['text']
    return translation
translated_review = translate_text("C'est un produit incroyable", target_language='en')
print(translated_review)

In [None]:
#Sentiment Analysis
def sentiment_analysis(client, text):
    documents = [text]
    response = client.analyze_sentiment(documents=documents)[0]
    return {
        "sentiment": response.sentiment,
        "positive": response.confidence_scores.positive,
        "neutral": response.confidence_scores.neutral,
        "negative": response.confidence_scores.negative
    }
sentiment_result = sentiment_analysis(text_analytics_client, cleaned_review)
print(sentiment_result)

In [None]:
def entity_recognition(client, text):
    response = client.recognize_entities(documents=[text])[0]
    entities = [(entity.text, entity.category) for entity in response.entities]
    return entities

def key_phrase_extraction(client, text):
    response = client.extract_key_phrases(documents=[text])[0]
    return response.key_phrases

entities = entity_recognition(text_analytics_client, cleaned_review)
key_phrases = key_phrase_extraction(text_analytics_client, cleaned_review)

print("Entities:", entities)
print("Key Phrases:", key_phrases)

In [None]:
#Storing Data in Database
import sqlite3

conn = sqlite3.connect('reviews.db')
c = conn.cursor()

c.execute('''CREATE TABLE IF NOT EXISTS reviews
             (review_text TEXT, sentiment TEXT, positive REAL, neutral REAL, negative REAL, entities TEXT, key_phrases TEXT)''')

def insert_review(review, sentiment_result, entities, key_phrases):
    c.execute("INSERT INTO reviews (review_text, sentiment, positive, neutral, negative, entities, key_phrases) VALUES (?, ?, ?, ?, ?, ?, ?)",
              (review, sentiment_result['sentiment'], sentiment_result['positive'], sentiment_result['neutral'], sentiment_result['negative'], str(entities), str(key_phrases)))
    conn.commit()

insert_review(cleaned_review, sentiment_result, entities, key_phrases)


In [None]:
#Visualization with Matplotlib
def visualize_sentiment_distribution():
    c.execute("SELECT sentiment, COUNT(*) FROM reviews GROUP BY sentiment")
    data = c.fetchall()
    
    sentiments = [row[0] for row in data]
    counts = [row[1] for row in data]
    
    plt.bar(sentiments, counts, color=['green', 'yellow', 'red'])
    plt.xlabel('Sentiment')
    plt.ylabel('Number of Reviews')
    plt.title('Sentiment Distribution')
    plt.show()

visualize_sentiment_distribution()