In [None]:
!pip install googletrans==4.0.0-rc1
!pip install microsofttranslator

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2023.1.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m

In [None]:
import pandas as pd
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def calculate_tweet_sentiment(tweet_content):
    pos_score = 0
    neg_score = 0

    words = word_tokenize(tweet_content)
    lemmatizer = WordNetLemmatizer()

    for word, pos in pos_tag(words):
        wordnet_tag = get_wordnet_pos(pos)
        wordnet_synsets = list(swn.senti_synsets(lemmatizer.lemmatize(word), wordnet_tag))
        if wordnet_synsets:
            synset = wordnet_synsets[0]
            pos_score += synset.pos_score()
            neg_score += synset.neg_score()

    sentiment_score = pos_score - neg_score
    return sentiment_score  # Returning the sentiment score itself

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return 'a'  # Adjective
    elif treebank_tag.startswith('V'):
        return 'v'  # Verb
    elif treebank_tag.startswith('N'):
        return 'n'  # Noun
    elif treebank_tag.startswith('R'):
        return 'r'  # Adverb
    else:
        return None


# Load your dataset (replace 'your_dataset.xlsx' with your file path)
file_path = '/content/September 2023 PB.xlsx'  # Replace this with your file path
df = pd.read_excel(file_path)


# Preprocessing and translation of the 'Comments' column
translator = Translator()

def preprocess_translate(comment):
    comment = comment.lower()
    comment = ''.join([c for c in comment if c.isalpha() or c.isspace()])
    if comment.strip().lower() == comment:  # Check if the comment is in English
        return comment  # Skip translation for English comments
    try:
        translated = translator.translate(comment, src='auto', dest='en').text
        return translated
    except Exception as e:
        print(f"Translation error: {e}")
        return comment  # Return the original text if translation fails

# Apply preprocessing and translation to 'Comments' column
df['Translated_Comments'] = df['Comments'].apply(preprocess_translate)

# Extract POS tags for each comment
def get_pos_tags(comment):
    words = word_tokenize(comment)
    return pos_tag(words)

df['POS_Tags'] = df['Translated_Comments'].apply(get_pos_tags)
# Add a new column 'POS_Tags' containing the POS tags for each comment

# Apply sentiment analysis to the translated comments
df['Senti_Score'] = df['Translated_Comments'].apply(calculate_tweet_sentiment)

# Apply labels based on sentiment score using SentiWordNet scores
def get_sentiment_label(score):
    pos_threshold = 0.25
    neg_threshold = -0.25

    if score > pos_threshold:
        return 'strongly positive'
    elif score > 0 and score <= pos_threshold:
        return 'weakly positive'
    elif score < neg_threshold:
        return 'strongly negative'
    elif score >= 0 and score < neg_threshold:
        return 'weakly negative'
    else:
        return 'neutral'

df['Labels'] = df['Senti_Score'].apply(get_sentiment_label)
# Add a new column 'Labels' containing the sentiment labels based on scores

# Display the modified DataFrame
print(df.head())

# Extracting filename from the file path
file_name = file_path.split('/')[-1]  # Extracting only the filename from the path

# Add 'Updated_' prefix to the filename
updated_file_name = '/content/Updated_' + file_name

# Save the updated DataFrame to a new Excel file
df.to_excel(updated_file_name, index=False)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


        Date       Time                                           Comments  \
0 2023-09-01  15:55:08Z  LG TV's are the biggest piece of junk I've eve...   
1 2023-09-01  14:06:05Z                          Sent from my Huawei phone   
2 2023-09-01  23:37:30Z  Lived in ksa my entire childhood currently 20 ...   
3 2023-09-02  11:45:28Z  You just need to buy # one general#. Any how, ...   
4 2023-09-02  02:26:34Z  You all people are right due to technology boo...   

                                 Translated_Comments  \
0  lg tvs are the biggest piece of junk ive ever ...   
1                          sent from my huawei phone   
2  lived in ksa my entire childhood currently  mo...   
3  you just need to buy  one general any how disa...   
4  you all people are right due to technology boo...   

                                            POS_Tags  Senti_Score  \
0  [(lg, NN), (tvs, NNS), (are, VBP), (the, DT), ...        0.500   
1  [(sent, NN), (from, IN), (my, PRP$), (huawei, ...    

#-------------------------------------------------------------------------------------------------------------------------

#Use This Section if you face any error in the translating the Comments column

In [None]:
import pandas as pd
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def calculate_tweet_sentiment(tweet_content):
    pos_score = 0
    neg_score = 0

    words = word_tokenize(tweet_content)
    lemmatizer = WordNetLemmatizer()

    for word, pos in pos_tag(words):
        wordnet_tag = get_wordnet_pos(pos)
        wordnet_synsets = list(swn.senti_synsets(lemmatizer.lemmatize(word), wordnet_tag))
        if wordnet_synsets:
            synset = wordnet_synsets[0]
            pos_score += synset.pos_score()
            neg_score += synset.neg_score()

    sentiment_score = pos_score - neg_score
    return sentiment_score  # Returning the sentiment score itself

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return 'a'  # Adjective
    elif treebank_tag.startswith('V'):
        return 'v'  # Verb
    elif treebank_tag.startswith('N'):
        return 'n'  # Noun
    elif treebank_tag.startswith('R'):
        return 'r'  # Adverb
    else:
        return None


# Load your dataset (replace 'your_dataset.xlsx' with your file path)
file_path = '/content/Jan 2023 PF.xlsx'  # Replace this with your file path
df = pd.read_excel(file_path)


# Preprocessing and translation of the 'Comments' column
translator = Translator()

def preprocess_translate(comment):
    if isinstance(comment, str):  # Check if the comment is a string
        comment = comment.lower()
        comment = ''.join([c for c in comment if c.isalpha() or c.isspace()])
        if comment.strip().lower() == comment:  # Check if the comment is in English
            return comment  # Skip translation for English comments
        try:
            translated = translator.translate(comment, src='ur', dest='en').text  # Translate from Urdu to English
            return translated
        except Exception as e:
            print(f"Translation error: {e}")
            return comment  # Return the original text if translation fails
    else:
        return str(comment)  # Convert non-string data to string


# Apply preprocessing and translation to 'Comments' column
df['Translated_Comments'] = df['Comments'].apply(preprocess_translate)

# Extract POS tags for each comment
def get_pos_tags(comment):
    words = word_tokenize(comment)
    return pos_tag(words)

df['POS_Tags'] = df['Translated_Comments'].apply(get_pos_tags)
# Add a new column 'POS_Tags' containing the POS tags for each comment

# Apply sentiment analysis to the translated comments
df['Senti_Score'] = df['Translated_Comments'].apply(calculate_tweet_sentiment)

# Apply labels based on sentiment score using SentiWordNet scores
def get_sentiment_label(score):
    pos_threshold = 0.25
    neg_threshold = -0.25

    if score > pos_threshold:
        return 'strongly positive'
    elif score > 0 and score <= pos_threshold:
        return 'weakly positive'
    elif score < neg_threshold:
        return 'strongly negative'
    elif score >= 0 and score < neg_threshold:
        return 'weakly negative'
    else:
        return 'neutral'

df['Labels'] = df['Senti_Score'].apply(get_sentiment_label)
# Add a new column 'Labels' containing the sentiment labels based on scores

# Display the modified DataFrame
print(df.head())

# Extracting filename from the file path
file_name = file_path.split('/')[-1]  # Extracting only the filename from the path

# Add 'Updated_' prefix to the filename
updated_file_name = '/content/Updated_' + file_name

# Save the updated DataFrame to a new Excel file
df.to_excel(updated_file_name, index=False)

#Microsoft Translator

In [None]:
import pandas as pd
from microsofttranslator import Translator
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag

# Download NLTK resources
nltk.download('punkt')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def calculate_tweet_sentiment(tweet_content):
    pos_score = 0
    neg_score = 0

    words = word_tokenize(tweet_content)
    lemmatizer = WordNetLemmatizer()

    for word, pos in pos_tag(words):
        wordnet_tag = get_wordnet_pos(pos)
        wordnet_synsets = list(swn.senti_synsets(lemmatizer.lemmatize(word), wordnet_tag))
        if wordnet_synsets:
            synset = wordnet_synsets[0]
            pos_score += synset.pos_score()
            neg_score += synset.neg_score()

    sentiment_score = pos_score - neg_score
    return sentiment_score  # Returning the sentiment score itself

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return 'a'  # Adjective
    elif treebank_tag.startswith('V'):
        return 'v'  # Verb
    elif treebank_tag.startswith('N'):
        return 'n'  # Noun
    elif treebank_tag.startswith('R'):
        return 'r'  # Adverb
    else:
        return None


# Load your dataset (replace 'your_dataset.xlsx' with your file path)
file_path = '/content/September 2023 JB.xlsx'  # Replace this with your file path
df = pd.read_excel(file_path)

# Initialize the Microsoft Translator
translator = Translator('YOUR_CLIENT_ID', 'YOUR_CLIENT_SECRET')

# Define the translation function
def preprocess_translate(comment):
    if isinstance(comment, str):  # Check if the comment is a string
        comment = comment.lower()
        comment = ''.join([c for c in comment if c.isalpha() or c.isspace()])
        if comment.strip().lower() == comment:  # Check if the comment is in English
            return comment  # Skip translation for English comments
        try:
            translated = translator.translate(comment, lang_from='auto', lang_to='en')
            return translated
        except Exception as e:
            print(f"Translation error: {e}")
            return comment  # Return the original text if translation fails
    else:
        return str(comment)  # Convert non-string data to string


# Apply preprocessing and translation to 'Comments' column
df['Translated_Comments'] = df['Comments'].apply(preprocess_translate)

# Extract POS tags for each comment
def get_pos_tags(comment):
    words = word_tokenize(comment)
    return pos_tag(words)

df['POS_Tags'] = df['Translated_Comments'].apply(get_pos_tags)
# Add a new column 'POS_Tags' containing the POS tags for each comment

# Apply sentiment analysis to the translated comments
df['Senti_Score'] = df['Translated_Comments'].apply(calculate_tweet_sentiment)

# Apply labels based on sentiment score using SentiWordNet scores
def get_sentiment_label(score):
    pos_threshold = 0.25
    neg_threshold = -0.25

    if score > pos_threshold:
        return 'strongly positive'
    elif score > 0 and score <= pos_threshold:
        return 'weakly positive'
    elif score < neg_threshold:
        return 'strongly negative'
    elif score >= 0 and score < neg_threshold:
        return 'weakly negative'
    else:
        return 'neutral'

df['Labels'] = df['Senti_Score'].apply(get_sentiment_label)
# Add a new column 'Labels' containing the sentiment labels based on scores

# Display the modified DataFrame
print(df.head())

# Extracting filename from the file path
file_name = file_path.split('/')[-1]  # Extracting only the filename from the path

# Add 'Updated_' prefix to the filename
updated_file_name = '/content/Updated_' + file_name

# Save the updated DataFrame to a new Excel file
df.to_excel(updated_file_name, index=False)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Translation error: Translator.translate() got an unexpected keyword argument 'lang_from'
Translation error: Translator.translate() got an unexpected keyword argument 'lang_from'
Translation error: Translator.translate() got an unexpected keyword argument 'lang_from'
Translation error: Translator.translate() got an unexpected keyword argument 'lang_from'
Translation error: Translator.translate() got an unexpected keyword argument 'lang_from'
        Date           Time  \
0 2023-09-10  12:21:57+0000   
1 2023-09-10  07:43:42+0000   
2 2023-09-11  11:23:00+0000   
3 2023-09-12  11:57:34+0000   
4 2023-09-13  11:10:55+0000   

                                            Comments  \
0  ال کلاس فور ملازمينو حکومت نه غوښته کړي چي دوي...   
1  پنجاب میں سارے لوگ ڈیوٹی پر ہے کام کر رہا ہے ا...   
2  پاکستان کی ترقی کے لیے موجودہ صوبوں کی تقسیم  ...   
3  پشاور کالج کے اساتذہ کا پروموشن بورڈ کے خلاف ا...   
4  پی ڈی اے ایمپلائز اینڈ ورکرز یونین کا مطالبات ...   

                               

#DeepL Tansalator


In [None]:
import pandas as pd
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
import requests

# Download NLTK resources
nltk.download('punkt')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

# Function to perform translation using DeepL API
def deepl_translate(text, api_key):
    url = "https://api.deepl.com/v2/translate"
    params = {
        'auth_key': api_key,
        'text': text,
        'target_lang': 'EN'
    }
    response = requests.post(url, data=params)
    if response.status_code == 200:
        return response.json()['translations'][0]['text']
    else:
        print(f"Translation error: {response.status_code}")
        return text

# Function to preprocess and translate comments
def preprocess_translate(comment):
    if isinstance(comment, str):
        comment = comment.lower()
        comment = ''.join([c for c in comment if c.isalpha() or c.isspace()])
        if comment.strip().lower() == comment:
            return comment
        try:
            translated = deepl_translate(comment, 'YOUR_API_KEY')
            return translated
        except Exception as e:
            print(f"Translation error: {e}")
            return comment
    else:
        return str(comment)

# Function to calculate sentiment score
def calculate_tweet_sentiment(tweet_content):
    pos_score = 0
    neg_score = 0

    words = word_tokenize(tweet_content)
    lemmatizer = WordNetLemmatizer()

    for word, pos in pos_tag(words):
        wordnet_tag = get_wordnet_pos(pos)
        wordnet_synsets = list(swn.senti_synsets(lemmatizer.lemmatize(word), wordnet_tag))
        if wordnet_synsets:
            synset = wordnet_synsets[0]
            pos_score += synset.pos_score()
            neg_score += synset.neg_score()

    sentiment_score = pos_score - neg_score
    return sentiment_score

# Function to get WordNet POS tags
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return 'a'
    elif treebank_tag.startswith('V'):
        return 'v'
    elif treebank_tag.startswith('N'):
        return 'n'
    elif treebank_tag.startswith('R'):
        return 'r'
    else:
        return None

# Load your dataset
file_path = '/content/September 2023 JB.xlsx'  # Replace with your file path
df = pd.read_excel(file_path)

# Apply preprocessing and translation to 'Comments' column
df['Translated_Comments'] = df['Comments'].apply(preprocess_translate)

# Apply sentiment analysis to the translated comments
df['Senti_Score'] = df['Translated_Comments'].apply(calculate_tweet_sentiment)

# Apply labels based on sentiment score using SentiWordNet scores
def get_sentiment_label(score):
    pos_threshold = 0.25
    neg_threshold = -0.25

    if score > pos_threshold:
        return 'strongly positive'
    elif score > 0 and score <= pos_threshold:
        return 'weakly positive'
    elif score < neg_threshold:
        return 'strongly negative'
    elif score >= 0 and score < neg_threshold:
        return 'weakly negative'
    else:
        return 'neutral'

df['Labels'] = df['Senti_Score'].apply(get_sentiment_label)

# Display the modified DataFrame
print(df.head())

# Extracting filename from the file path
file_name = file_path.split('/')[-1]
updated_file_name = '/content/Updated_' + file_name

# Save the updated DataFrame to a new Excel file
df.to_excel(updated_file_name, index=False)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Translation error: 403
Translation error: 403
Translation error: 403
Translation error: 403
Translation error: 403
        Date           Time  \
0 2023-09-10  12:21:57+0000   
1 2023-09-10  07:43:42+0000   
2 2023-09-11  11:23:00+0000   
3 2023-09-12  11:57:34+0000   
4 2023-09-13  11:10:55+0000   

                                            Comments  \
0  ال کلاس فور ملازمينو حکومت نه غوښته کړي چي دوي...   
1  پنجاب میں سارے لوگ ڈیوٹی پر ہے کام کر رہا ہے ا...   
2  پاکستان کی ترقی کے لیے موجودہ صوبوں کی تقسیم  ...   
3  پشاور کالج کے اساتذہ کا پروموشن بورڈ کے خلاف ا...   
4  پی ڈی اے ایمپلائز اینڈ ورکرز یونین کا مطالبات ...   

                                 Translated_Comments  Senti_Score  \
0  ال کلاس فور ملازمينو حکومت نه غوښته کړي چي دوي...          0.0   
1  پنجاب میں سارے لوگ ڈیوٹی پر ہے کام کر رہا ہے ا...          0.0   
2  پاکستان کی ترقی کے لیے موجودہ صوبوں کی تقسیم  ...          1.5   
3  پشاور کالج کے اساتذہ کا پروموشن بورڈ کے خلاف ا...          0.0   
4  پی ڈی اے ایمپ