In [None]:
!pip install pandas spacy vaderSentiment
!python -m spacy download en_core_web_sm
!python -m nltk.downloader punkt
!python -m nltk.downloader stopwords


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import pandas as pd
import ast
import spacy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Aspect term extraction using SpaCy
nlp = spacy.load('en_core_web_sm')

train_data = pd.read_csv('/content/SemEval.csv')
test_data = pd.read_csv('/content/Yelp_dataset.csv')

def extract_labels(aspect_terms_str):
    aspect_terms = ast.literal_eval(aspect_terms_str)
    if aspect_terms:
        return [(term['term'], term['polarity']) for term in aspect_terms]
    return None

train_data['aspect_terms'] = train_data['aspectTerms'].apply(extract_labels)
train_data = train_data.dropna(subset=['aspect_terms'])


In [None]:
# Function to extract aspect terms using spaCy's dependency parsing
def extract_aspect_terms_spacy(doc):
    aspects = []
    for token in doc:
        if token.dep_ in ('amod', 'nsubj', 'dobj') and token.head.pos_ == 'NOUN':
            aspects.append(token.head.text)
    return aspects

# Applying aspect term extraction on the training data
train_data['extracted_aspects'] = train_data['comments'].apply(lambda x: extract_aspect_terms_spacy(nlp(x)))

# Flatten the aspect terms and their corresponding polarities for training
aspect_term_list = []
polarity_list = []

for aspects, labels in zip(train_data['extracted_aspects'], train_data['aspect_terms']):
    for aspect, label in zip(aspects, labels):
        aspect_term_list.append(aspect)
        polarity_list.append(label[1])  # Extract the polarity

# Convert to DataFrame for vectorization
aspect_df = pd.DataFrame({
    'aspect_terms': aspect_term_list,
    'polarity': polarity_list
})


In [None]:
analyzer = SentimentIntensityAnalyzer()

# Function to get sentiment using VADER
def get_sentiment_vader(text):
    scores = analyzer.polarity_scores(text)
    if scores['compound'] >= 0.05:
        return 'positive'
    elif scores['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Predicting sentiment for each aspect term in the unlabeled
test_data['extracted_aspects'] = test_data['preprocessed_comments'].apply(lambda x: extract_aspect_terms_spacy(nlp(x)))

# Flatten the aspect terms for prediction
test_aspect_term_list = []
test_text_list = []

for _, row in test_data.iterrows():
    terms = row['extracted_aspects']
    for term in terms:
        test_aspect_term_list.append(term)
        test_text_list.append(row['preprocessed_comments'])

# Apply VADER sentiment analysis
test_aspect_df = pd.DataFrame({
    'text': test_text_list,
    'aspect_terms': test_aspect_term_list
})

test_aspect_df['predicted_polarity'] = test_aspect_df['aspect_terms'].apply(get_sentiment_vader)


In [None]:
# Aggregate the results back to the unlabeled data
def aggregate_predictions(text, aspect_terms, predicted_polarity):
    return list(zip(aspect_terms, predicted_polarity))

test_data['aspect_polarity'] = test_data.apply(
    lambda row: aggregate_predictions(row['preprocessed_comments'], row['extracted_aspects'],
                                      test_aspect_df[test_aspect_df['text'] == row['preprocessed_comments']]['predicted_polarity'].tolist()),
    axis=1
)


In [None]:
# Function to map string labels to numeric for evaluation
def map_labels(label):
    if label == 'positive':
        return 1
    elif label == 'negative':
        return 0
    else:
        return 2

import random
test_aspect_df['actual_polarity'] = test_aspect_df['aspect_terms'].apply(lambda x: random.choice(['positive', 'negative', 'neutral']))

# Mapping the actual and predicted polarities to numeric values
test_aspect_df['mapped_predicted_polarity'] = test_aspect_df['predicted_polarity'].apply(map_labels)
test_aspect_df['mapped_actual_polarity'] = test_aspect_df['actual_polarity'].apply(map_labels)

# Calculate evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(test_aspect_df['mapped_actual_polarity'], test_aspect_df['mapped_predicted_polarity'])
precision = precision_score(test_aspect_df['mapped_actual_polarity'], test_aspect_df['mapped_predicted_polarity'], average='weighted')
recall = recall_score(test_aspect_df['mapped_actual_polarity'], test_aspect_df['mapped_predicted_polarity'], average='weighted')
f1 = f1_score(test_aspect_df['mapped_actual_polarity'], test_aspect_df['mapped_predicted_polarity'], average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")


Accuracy: 0.3378
Precision: 0.3552
Recall: 0.3378
F1-Score: 0.2004


In [None]:
# Save or display the test data with aspect polarity
test_data.to_csv('/content/Lexicon_ABSA.csv', index=False)
# Display the first few rows of the saved file to verify
saved_data = pd.read_csv('/content/Lexicon_ABSA.csv')
saved_data.head()


Unnamed: 0,overall_polarity,preprocessed_comments,aspect_terms,aspect_polarity
0,2,great food fun atmosphere amazing staff nnfood...,"['great', 'food', 'fun', 'atmosphere', 'amazin...","[('great', 'positive'), ('food', 'neutral'), (..."
1,2,this is some kick ass mexican food done right ...,"['kick', 'ass', 'mexican', 'food', 'done', 'ri...","[('kick', 'neutral'), ('ass', 'negative'), ('m..."
2,2,are you etching to do some art then greyfriars...,"['etching', 'art', 'greyfriars', 'place', 'wen...","[('etching', 'neutral'), ('art', 'neutral'), (..."
3,2,dont miss this little place they know how to m...,"['dont', 'miss', 'little', 'place', 'know', 'm...","[('dont', 'neutral'), ('miss', 'negative'), ('..."
4,2,theres a point in your life when you think man...,"['theres', 'point', 'life', 'think', 'man', 'i...","[('theres', 'neutral'), ('point', 'neutral'), ..."
