In [2]:
import json

# Load the data from the JSON file
with open("output2.json", "r", encoding="utf-8") as f:
    data = json.load(f)


In [3]:
TRAIN_DATA = []

for item in data:
    text = item["text"]
    entities = []

    for ent in item["entities"]:
        start = ent["start_offset"]
        end = ent["end_offset"]
        label = ent["label"]
        entities.append((start, end, label))

    TRAIN_DATA.append((text, {"entities": entities}))


In [5]:
import spacy
from spacy.training import Example
import re

# Ön işleme fonksiyonu
def preprocess_text(text):
    # Metni küçük harflere dönüştür
    text = text.lower()
    # Noktalama işaretlerini kaldır
    text = re.sub(r'[^\w\s]', '', text)
    # Gereksiz boşlukları kaldır
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Entity'lerin hizalanmasını kontrol et ve güncelle
def adjust_entity_offsets(text, original_text, entities):
    adjusted_entities = []
    original_words = original_text.split()
    processed_words = text.split()

    offset_map = {}
    original_idx = 0
    for word in processed_words:
        while original_idx < len(original_words) and original_words[original_idx].lower() != word:
            original_idx += 1
        if original_idx < len(original_words):
            offset_map[original_idx] = len(' '.join(processed_words[:processed_words.index(word)]))
            original_idx += 1

    for start, end, label in entities:
        new_start = offset_map.get(start)
        new_end = offset_map.get(end-1) + len(original_words[end-1]) if end-1 in offset_map else end
        if new_start is not None and new_end is not None:
            adjusted_entities.append((new_start, new_end, label))

    return adjusted_entities

# Eğitim verilerini ön işlemeden geçirme ve entity hizalamasını güncelleme
def preprocess_data(data):
    preprocessed_data = []
    for text, annotations in data:
        preprocessed_text = preprocess_text(text)
        adjusted_entities = adjust_entity_offsets(preprocessed_text, text, annotations.get("entities"))
        filtered_entities = filter_overlapping_entities(adjusted_entities)
        preprocessed_data.append((preprocessed_text, {"entities": filtered_entities}))
    return preprocessed_data

# Çakışan entity'leri filtreleme
def filter_overlapping_entities(entities):
    entities = sorted(entities, key=lambda x: x[0])  # start offset'e göre sırala
    filtered_entities = []
    last_end = -1

    for start, end, label in entities:
        if start >= last_end:
            filtered_entities.append((start, end, label))
            last_end = end

    return filtered_entities

# Eğitim verilerini ön işlemeden geçir ve hizalanmış veriyi al
TRAIN_DATA = preprocess_data(TRAIN_DATA)

# Boş bir model oluştur veya mevcut bir modeli yükle
nlp = spacy.blank("tr")

# NER pipeline bileşenini ekle
ner = nlp.add_pipe("ner")

# NER bileşenine etiketleri ekle
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# Modeli eğitim verileri ile başlat
optimizer = nlp.begin_training()

# Modeli eğit
for i in range(10):
    losses = {}
    for text, annotations in TRAIN_DATA:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], losses=losses, drop=0.5, sgd=optimizer)
    print(losses)




{'ner': 179.015905254792}
{'ner': 20.80084943928217}
{'ner': 21.468249363985528}
{'ner': 15.639950456476726}
{'ner': 18.565283212224017}
{'ner': 17.902062677596504}
{'ner': 15.00816582763092}
{'ner': 15.667858167880889}
{'ner': 23.01564729346938}
{'ner': 20.29996609375971}


In [6]:
nlp.to_disk("ner_model")


In [12]:
import spacy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# SpaCy ve VADER yükle
nlp = spacy.load("ner_model")
analyzer = SentimentIntensityAnalyzer()

# Metni analiz et
text = "Turkcell akıllı cihaz eski tip olan t40 ve t50 yi kullandim ve t50 yi kullaniyorum hala gayet de iyi bi telefon t50 nin arka kapak yokdu sabit bir kasasi vardi ariza durumynda tlf cope gidiyordu tek sorun buydu yeni modellerde bunu ortadan kaldirmislar gordugum kadariyla seyyar kapak yok ise tavsiye etmiyorum geri kalan bu fiyatta normal ozellikler"
doc = nlp(text)

# Entity'lere duygu analizi ekle
for ent in doc.ents:
    # Entity etrafındaki metni al
    ent_text = ent.text
    ent_start = ent.start_char
    ent_end = ent.end_char
    ent_context = text[ent_start:ent_end]

    # Duygu analizi yap
    ent_sentiment = analyzer.polarity_scores(ent_context)
    sentiment_label = 'Neutral'
    if ent_sentiment['compound'] >= 0.05:
        sentiment_label = 'Positive'
    elif ent_sentiment['compound'] <= -0.05:
        sentiment_label = 'Negative'

    # Entity ve duygu etiketini yazdır
    print(f"Entity: {ent.text}, Label: {ent.label_}, Sentiment: {sentiment_label}")

Entity: Turkcell, Label: Turkcell, Sentiment: Neutral


In [8]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [14]:
from google.colab import drive
drive.mount('/content/drive')

# Modeli Google Drive'a kaydet
nlp.to_disk('/content/drive/MyDrive/ner_model')



Mounted at /content/drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')