In [5]:
import nltk
import string
import csv
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer

# Gerekli NLTK bileşenlerini indir
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Stopwords listesi ve yardımcı nesneler
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

# Dosyayı oku
with open("karar_metni_2025-05-03_16-17-56.txt", "r", encoding="utf-8") as file:
    text = file.read()

# Cümlelere ayır
sentences = sent_tokenize(text)

# Her cümle için lemmatize ve stem işlemleri
tokenized_corpus_lemmatized = []
tokenized_corpus_stemmed = []

def preprocess_sentence(sentence):
    tokens = word_tokenize(sentence)
    filtered_tokens = [token.lower() for token in tokens if token.isalpha() and token.lower() not in stop_words]
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    return lemmatized_tokens, stemmed_tokens

for sentence in sentences:
    lemmatized, stemmed = preprocess_sentence(sentence)
    tokenized_corpus_lemmatized.append(lemmatized)
    tokenized_corpus_stemmed.append(stemmed)

# CSV dosyasına kaydet (lemmatized)
with open("lemmatized_sentences.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    for tokens in tokenized_corpus_lemmatized:
        writer.writerow([' '.join(tokens)])

# CSV dosyasına kaydet (stemmed)
with open("stemmed_sentences.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    for tokens in tokenized_corpus_stemmed:
        writer.writerow([' '.join(tokens)])

# İlk 5 cümleyi yazdır
for i in range(min(5, len(sentences))):
    print(f"Cümle {i+1} - Base: {sentences[i]}")
    print(f"Cümle {i+1} - Lemmatized: {tokenized_corpus_lemmatized[i]}")
    print(f"Cümle {i+1} - Stemmed: {tokenized_corpus_stemmed[i]}")
    print("\n")



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Cümle 1 - Base: 141 Nev., Advance Opinion c9a
IN THE SUPREME COURT OF THE STATE OF NEVADA
QUASHAWN SAQUAN SHERIDAN,                               No.
Cümle 1 - Lemmatized: ['advance', 'opinion', 'supreme', 'court', 'state', 'nevada', 'quashawn', 'saquan', 'sheridan']
Cümle 1 - Stemmed: ['advanc', 'opinion', 'suprem', 'court', 'state', 'nevada', 'quashawn', 'saquan', 'sheridan']


Cümle 2 - Base: 89167
Appellant,
vs.
Cümle 2 - Lemmatized: ['appellant', 'v']
Cümle 2 - Stemmed: ['appel', 'vs']


Cümle 3 - Base: THE STATE OF NEVADA,                                a    FILED
Respondent.
Cümle 3 - Lemmatized: ['state', 'nevada', 'filed', 'respondent']
Cümle 3 - Stemmed: ['state', 'nevada', 'file', 'respond']


Cümle 4 - Base: APR 2 4 2ŒZ5
:YL;Mr
HA               RT
Appeal from a district court order revoking appellant's
probation.
Cümle 4 - Lemmatized: ['apr', 'yl', 'mr', 'ha', 'rt', 'appeal', 'district', 'court', 'order', 'revoking', 'appellant', 'probation']
Cümle 4 - Stemmed: ['apr', 'yl'