## Veri Setini Yükleme:

In [None]:
import nltk
from nltk.corpus import twitter_samples

In [None]:
from nltk.corpus import stopwords

In [None]:
nltk.download('twitter_samples')

In [None]:
nltk.download('stopwords')

In [None]:
from nltk.tokenize import TweetTokenizer

In [None]:
from nltk.stem import PorterStemmer

In [None]:
import re
import pandas as pd

In [None]:
negative_tweets = twitter_samples.strings("negative_tweets.json")
df_neg = pd.DataFrame(negative_tweets, columns = ['text'])
df_neg['label'] = 0

In [None]:
positive_tweets = twitter_samples.strings("positive_tweets.json")
df_pos = pd.DataFrame(positive_tweets, columns = ['text'])
df_pos['label'] = 1

In [None]:
df = pd.concat([df_neg,df_pos])
df = df.sample(frac=1).reset_index(drop=True)

In [None]:
df.head(10)

## Metinleri Ön İşleme:

In [None]:
nltk.download('punkt')

In [38]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re

# Metin Temizleme
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # URL'leri kaldır
    text = re.sub(r'\@\w+|\#', '', text)  # Kullanıcı adlarını ve hashtagleri kaldır
    text = re.sub(r'[^\w\s]', '', text)  # Özel karakterleri kaldır
    return text

# Metni Küçük Harfe Dönüştürme
def lowercase(text):
    return text.lower()

# Tokenization
def tokenize(text):
    return word_tokenize(text)

# Stopwords Temizleme
def remove_stopwords(tokens):
    stopwords_english = set(stopwords.words('english'))
    return [word for word in tokens if word not in stopwords_english]

# Stemming
def stem(tokens):
    stemmer = PorterStemmer()
    return [stemmer.stem(word) for word in tokens]

# Tüm Veri Ön İşleme Adımlarını Uygula
def preprocess(text):
    text = clean_text(text)
    text = lowercase(text)
    tokens = tokenize(text)
    tokens = remove_stopwords(tokens)
    tokens = stem(tokens)
    return ' '.join(tokens)  # Token'ları birleştirerek temizlenmiş metni döndür

# Ön İşleme Adımlarını Tüm DataFrame'e Uygula
df['text_cleaned'] = df['text'].apply(preprocess)


In [None]:
df.head()

## Kelime Bulutu

In [None]:
from nltk.probability import FreqDist
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re


In [None]:
word = FreqDist(df['text_cleaned'])
wordcloud = WordCloud(width= 400, height=200,
        background_color= 'white').generate_from_frequencies(word)


In [None]:
plt.figure(figsize=(5,3))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

## Öznitelik Çıkarımı:

## Tf Idf

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=10000)
tfidf_matrix = vectorizer.fit_transform(df['text_cleaned'])
feature_names = vectorizer.get_feature_names_out()
print("td Idf matris: ",tfidf_matrix.shape)

## Model Eğitimi

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(tfidf_matrix,
                        df['label'], test_size = 0.2, random_state=42)

## KNN

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import warnings
warnings.simplefilter(action='ignore')


In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)

In [None]:
print("knn sonuçları")
print("Doğruluk: ",accuracy_score(y_test,y_pred))
print("F1: ",f1_score(y_test,y_pred))
print("Hassasiyet",recall_score(y_test,y_pred))
print("Kesinlik",precision_score(y_test,y_pred))

## RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators= 150)
clf.fit(X_train,y_train)
y_pred_clf = clf.predict(X_test)

In [None]:
print("Random Forest sonuçları")
print("Doğruluk: ",accuracy_score(y_test,y_pred_clf))
print("F1: ",f1_score(y_test,y_pred_clf))
print("Hassasiyet",recall_score(y_test,y_pred_clf))
print("Kesinlik",precision_score(y_test,y_pred_clf))

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
cm = confusion_matrix(y_test ,y_pred)
plt.figure(figsize = (4,3))
sns.heatmap(cm, annot= True, fmt='d', cmap = 'Blues')

In [None]:
cm = confusion_matrix(y_test ,y_pred_clf)
plt.figure(figsize = (4,3))
sns.heatmap(cm, annot= True, fmt='d', cmap = 'Blues')

## Logistic Regresyon

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train,y_train)
y_pred_lr = lr.predict(X_test)

In [None]:
print("LR sonuçları")
print("Doğruluk: ",accuracy_score(y_test,y_pred_lr))
print("F1: ",f1_score(y_test,y_pred_lr))
print("Hassasiyet",recall_score(y_test,y_pred_lr))
print("Kesinlik",precision_score(y_test,y_pred_lr))

In [None]:
cm = confusion_matrix(y_test ,y_pred_lr)
plt.figure(figsize = (4,3))
sns.heatmap(cm, annot= True, fmt='d', cmap = 'Blues')

## SVM (Support Vector Machines)

In [None]:
from sklearn.svm import SVC
svm = SVC(kernel = 'linear')
svm.fit(X_train,y_train)
y_pred_svm = svm.predict(X_test)

In [None]:
print("SVM sonuçları")
print("Doğruluk: ",accuracy_score(y_test,y_pred_svm))
print("F1: ",f1_score(y_test,y_pred_svm))
print("Hassasiyet",recall_score(y_test,y_pred_svm))
print("Kesinlik",precision_score(y_test,y_pred_svm))

In [None]:
cm = confusion_matrix(y_test ,y_pred_svm)
plt.figure(figsize = (4,3))
sns.heatmap(cm, annot= True, fmt='d', cmap = 'Blues')