In [6]:
import pandas as pd
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from textblob import TextBlob
import numpy as np
import re

#!python -m spacy download en_core_web_md
nlp = spacy.load('en_core_web_md')

Przeanalizować, czy pojawia się sarkazm i ironia w toksycznych komentarzach, szukając niezgodności między dosłownym znaczeniem a kontekstem emocjonalnym. Wykorzystać wektoryzację, aby analizować podobieństwo semantyczne komentarzy do przeciwstawnych emocji.

In [7]:
df = pd.read_csv('sample.csv')

positive_emotion_terms = "joy, happiness, delight, love"
negative_emotion_terms = "anger, hatred, sadness, fear"
positive_emotion_vector = nlp(positive_emotion_terms).vector
negative_emotion_vector = nlp(negative_emotion_terms).vector

positive_keywords = {"happy", "joy", "delight", "love", "exciting", "good"}
negative_keywords = {"angry", "hate", "bad", "sad", "awful", "terrible"}

def analyze_sarcasm(comment):
    doc = nlp(comment)
    doc_vector = doc.vector

    sentiment_analysis = TextBlob(comment)
    polarity = sentiment_analysis.sentiment.polarity

    positive_count = sum(1 for token in doc if token.lemma_ in positive_keywords)
    negative_count = sum(1 for token in doc if token.lemma_ in negative_keywords)

    similarity_to_positive = cosine_similarity([doc_vector], [positive_emotion_vector])[0][0]
    similarity_to_negative = cosine_similarity([doc_vector], [negative_emotion_vector])[0][0]

    if polarity > 0 and negative_count > positive_count:
        return 'Potencjalny sarkazm'
    elif polarity < 0 and positive_count > negative_count:
        return 'Potencjalny sarkazm'
    elif similarity_to_negative > similarity_to_positive:
        return 'Możliwy sarkazm'
    else:
        return 'Brak sarkazmu'

df['sarcasm_detection'] = df['comment_text'].apply(analyze_sarcasm)
print(df[['comment_text', 'sarcasm_detection']].head())


                                        comment_text sarcasm_detection
0  What a breathe of fresh air to have someone wh...     Brak sarkazmu
1  Your jewish friends were the ones who told you...   Możliwy sarkazm
2  Possible collusion by Trump and his affiliates...   Możliwy sarkazm
3  Exactly.  We need a % of GDP spending cap at t...     Brak sarkazmu
4  By your own comment, even if some of them vote...   Możliwy sarkazm


Dla książek Anna Karenina oraz Jane Eyre - Wyodrębnić opisy i dialogi wybranych bohaterów, np. Anny, Aleksieja, Jane i Edwarda. Obliczyć podobieństwa semantyczne między bohaterami i określić, jak różne są ich osobowości.

In [8]:
with open('anna_karenina.txt', 'r', encoding='utf-8') as file:
    anna_karenina_text = file.read()

with open('jane_eyre.txt', 'r', encoding='utf-8') as file:
    jane_eyre_text = file.read()

def analyze_character_text(text, character_name):
    sentences = re.split(r'[.!?]', text)
    character_related = [sentence.strip() for sentence in sentences if character_name in sentence]

    descriptions = []
    dialogues = []

    for sentence in character_related:
        if '"' in sentence or "'" in sentence:
            dialogues.append(sentence)
        else:
            descriptions.append(sentence)

    description_vector = nlp(' '.join(descriptions)).vector if descriptions else np.zeros((300,))
    dialogue_vector = nlp(' '.join(dialogues)).vector if dialogues else np.zeros((300,))

    return description_vector, dialogue_vector

anna_desc_vec, anna_dial_vec = analyze_character_text(anna_karenina_text, "Anna")
alexei_desc_vec, alexei_dial_vec = analyze_character_text(anna_karenina_text, "Alexei")
jane_desc_vec, jane_dial_vec = analyze_character_text(jane_eyre_text, "Jane")
edward_desc_vec, edward_dial_vec = analyze_character_text(jane_eyre_text, "Edward")

similarity_anna_jane_desc = cosine_similarity([anna_desc_vec], [jane_desc_vec])[0][0]
similarity_alexei_edward_desc = cosine_similarity([alexei_desc_vec], [edward_desc_vec])[0][0]

print(f'Podobieństwo opisów między Anną i Jane: {similarity_anna_jane_desc}')
print(f'Podobieństwo opisów między Aleksiejem i Edwardem: {similarity_alexei_edward_desc}')

similarity_anna_jane_dial = cosine_similarity([anna_dial_vec], [jane_dial_vec])[0][0]
similarity_alexei_edward_dial = cosine_similarity([alexei_dial_vec], [edward_dial_vec])[0][0]

print(f'Podobieństwo dialogów między Anną i Jane: {similarity_anna_jane_dial}')
print(f'Podobieństwo dialogów między Aleksiejem i Edwardem: {similarity_alexei_edward_dial}')


Podobieństwo opisów między Anną i Jane: 0.9400222301483154
Podobieństwo opisów między Aleksiejem i Edwardem: 0.7150282263755798
Podobieństwo dialogów między Anną i Jane: 0.0
Podobieństwo dialogów między Aleksiejem i Edwardem: 0.0
