In [3]:
import re
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob
from googletrans import Translator
from nltk import NaiveBayesClassifier, DecisionTreeClassifier, MaxentClassifier
from nltk.classify import apply_features
from nltk.classify.util import accuracy

# Function to preprocess the input text
def preprocess(text):
    stop_words = set(stopwords.words("english"))
    translator = Translator(service_urls=['translate.google.co.in'])
    
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    text = translator.translate(text, src="hi", dest="en").text
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = " ".join([word for word in text.split() if word.lower() not in stop_words])
    
    lemmatizer = WordNetLemmatizer()
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])
    
    return text

# Function to extract features from the preprocessed text
def extract_features(text, adjectives):
    return {word: True for word in text.split() if word in adjectives}

# Function to train multiple classifiers
def train_classifiers(pos_data, neg_data, adjectives):
    classifiers = [
        (NaiveBayesClassifier, "NaiveBayesClassifier"),
        (DecisionTreeClassifier, "DecisionTreeClassifier"),
        (MaxentClassifier, "MaxentClassifier")
    ]
    
    results = []
    
    for classifier_type, classifier_name in classifiers:
        pos_features = [(extract_features(preprocess(text), adjectives), 'pos') for text in pos_data]
        neg_features = [(extract_features(preprocess(text), adjectives), 'neg') for text in neg_data]
        
        train_data = pos_features + neg_features
        classifier = classifier_type.train(train_data)
        
        acc = accuracy(classifier, train_data)
        results.append((classifier_name, acc, classifier))
    
    return results

# Example usage
positive_data = ["This app is great!", "I love the new features.", "Amazing experience."]
negative_data = ["The app crashes frequently.", "Disappointed with the update.", "Worst app ever."]

adjectives = ["good", "bad", "amazing", "worst"]  # Add your list of adjectives

classifiers_results = train_classifiers(positive_data, negative_data, adjectives)

# Test the classifiers with sample text
test_text = "The latest update is fantastic!"

for classifier_name, acc, classifier in classifiers_results:
    # Preprocess the test text
    preprocessed_text = preprocess(test_text)

    # Extract features from the preprocessed text
    test_features = extract_features(preprocessed_text, adjectives)

    # Classify the sentiment using the trained classifier
    predicted_sentiment = classifier.classify(test_features)

    # Print the result
    print(f"{classifier_name} - Accuracy: {acc:.2f}, Predicted Sentiment: {predicted_sentiment}")


  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -0.69315        0.500
             2          -0.69315        0.500
             3          -0.69315        0.500
             4          -0.69315        0.500
             5          -0.69315        0.500
             6          -0.69315        0.500
             7          -0.69315        0.500
             8          -0.69315        0.500
             9          -0.69315        0.500
            10          -0.69315        0.500
            11          -0.69315        0.500
            12          -0.69315        0.500
            13          -0.69315        0.500
            14          -0.69315        0.500
            15          -0.69315        0.500
            16          -0.69315        0.500
            17          -0.69315        0.500
            18          -0.69315        0.500
            19          -0.69315        0.500
 

In [7]:
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'comments': [
    "Ghatiya app hai.",
    "service user-friendly nahi hai.",
    "Bohot badiya app hai",
    "The latest update is disappointing."
    ]
})

# Create new columns to store the results
classifiers_names = [classifier_name for classifier_name, _, _ in classifiers_results]
df[classifiers_names + ['accuracy', 'preprocessed_comment']] = ""

# Test the classifiers with the comments column in the DataFrame
for index, row in df.iterrows():
    comment = row['comments']
    
    results_for_comment = []

    for classifier_name, acc, classifier in classifiers_results:
        # Preprocess the comment
        preprocessed_comment = preprocess(comment)

        # Extract features from the preprocessed comment
        comment_features = extract_features(preprocessed_comment, adjectives)

        # Classify the sentiment using the trained classifier
        predicted_sentiment = classifier.classify(comment_features)

        # Store the results in a tuple
        results_for_comment.append((classifier_name, acc, predicted_sentiment))

    # Sort the results by accuracy in descending order
    results_for_comment.sort(key=lambda x: x[1], reverse=True)

    # Store the results in the DataFrame columns
    for result in results_for_comment:
        classifier_name, acc, predicted_sentiment = result
        df.at[index, classifier_name] = predicted_sentiment

    # Store the highest accuracy result in the DataFrame columns
    df.at[index, 'accuracy'] = results_for_comment[0][1]
    df.at[index, 'preprocessed_comment'] = preprocessed_comment

# Display the updated DataFrame
df

Unnamed: 0,comments,NaiveBayesClassifier,DecisionTreeClassifier,MaxentClassifier,accuracy,preprocessed_comment
0,Ghatiya app hai.,pos,pos,pos,0.625,cheap app
1,service user-friendly nahi hai.,pos,pos,pos,0.625,Service userfriendly
2,Bohot badiya app hai,neg,neg,neg,0.625,good app
3,The latest update is disappointing.,pos,pos,pos,0.625,latest update disappointing


In [14]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from googletrans import Translator
import re
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Function to preprocess the input text
def preprocess(text):
    stop_words = set(stopwords.words("english"))
    translator = Translator(service_urls=['translate.google.co.in'])
    
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    text = translator.translate(text, src="hi", dest="en").text
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = " ".join([word for word in text.split() if word.lower() not in stop_words])
    
    lemmatizer = WordNetLemmatizer()
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])
    
    return text

# Function to translate Hinglish comments to English
def translate_to_english(comment):
    translator = Translator(service_urls=['translate.google.co.in'])
    return translator.translate(comment, src="hi", dest="en").text

# Sample DataFrame with Hinglish comments
df = pd.DataFrame({
    'comments': [
        "Bekar app hai.",
        "Aur better ho sakta tha",
        "Bohot badiya app hai",
        "The latest update is disappointing.",
        "Fraud app banaya hai "
    ]
})

# Create new columns to store the results
df['translated_comment'] = ""
df['sentiment_score'] = ""

# Initialize SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Translate Hinglish comments to English and analyze sentiment
for index, row in df.iterrows():
    hinglish_comment = row['comments']

    # Translate Hinglish comment to English
    english_comment = translate_to_english(hinglish_comment)
    df.at[index, 'translated_comment'] = english_comment

    # Preprocess the translated comment
    preprocessed_comment = preprocess(english_comment)

    # Analyze sentiment using SentimentIntensityAnalyzer
    sentiment_score = sia.polarity_scores(preprocessed_comment)['compound']

    # Store the sentiment score in the DataFrame column
    df.at[index, 'sentiment_score'] = sentiment_score

# Display the updated DataFrame
df


Unnamed: 0,comments,translated_comment,sentiment_score
0,Bekar app hai.,Is a useless app.,-0.4215
1,Aur better ho sakta tha,And could be better,0.4404
2,Bohot badiya app hai,It is very good app,0.4404
3,The latest update is disappointing.,The latest update is disappointing.,-0.4939
4,Fraud app banaya hai,Fraud app is created,-0.4215


In [17]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from googletrans import Translator
import re
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Function to preprocess the input text
def preprocess(text):
    stop_words = set(stopwords.words("english"))
    translator = Translator(service_urls=['translate.google.co.in'])
    
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
    text = translator.translate(text, src="hi", dest="en").text
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = " ".join([word for word in text.split() if word.lower() not in stop_words])
    
    lemmatizer = WordNetLemmatizer()
    text = " ".join([lemmatizer.lemmatize(word) for word in text.split()])
    
    return text

# Function to translate Hinglish comments to English
def translate_to_english(comment):
    translator = Translator(service_urls=['translate.google.co.in'])
    return translator.translate(comment, src="hi", dest="en").text

# Function to categorize sentiment as positive, negative, or neutral
def categorize_sentiment(score):
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Sample DataFrame with Hinglish comments
df = pd.DataFrame({
    'comments': [
        "Bekar app hai.",
        "Aur better ho sakta tha",
        "Bohot badiya app hai",
        "The latest update is disappointing.",
        "Fraud app banaya hai "
    ]
})

# Create new columns to store the results
df['translated_comment'] = ""
df['sentiment_score'] = ""
df['sentiment'] = ""

# Initialize SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Translate Hinglish comments to English and analyze sentiment
for index, row in df.iterrows():
    hinglish_comment = row['comments']

    # Translate Hinglish comment to English
    english_comment = translate_to_english(hinglish_comment)
    df.at[index, 'translated_comment'] = english_comment

    # Preprocess the translated comment
    preprocessed_comment = preprocess(english_comment)

    # Analyze sentiment using SentimentIntensityAnalyzer
    sentiment_score = sia.polarity_scores(preprocessed_comment)['compound']
    df.at[index, 'sentiment_score'] = sentiment_score

    # Categorize sentiment
    sentiment_category = categorize_sentiment(sentiment_score)
    df.at[index, 'sentiment'] = sentiment_category

# Display the updated DataFrame
df


Unnamed: 0,comments,translated_comment,sentiment_score,sentiment
0,Bekar app hai.,Is a useless app.,-0.4215,negative
1,Aur better ho sakta tha,And could be better,0.4404,positive
2,Bohot badiya app hai,It is very good app,0.4404,positive
3,The latest update is disappointing.,The latest update is disappointing.,-0.4939,negative
4,Fraud app banaya hai,Fraud app is created,-0.4215,negative
