In [None]:
import pandas as pd
import re
import string

# Dosya yolu (senin belirttiÄŸin gibi)
input_path = "C:/Users/EKIN/Desktop/Combined Data.csv"

# CSV dosyasÄ±nÄ± yÃ¼kle
df = pd.read_csv(input_path)

# Gereksiz 'Unnamed: 0' sÃ¼tunu varsa sil
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# 'statement' ve 'status' sÃ¼tunlarÄ±nda boÅŸ deÄŸerleri temizle
df.dropna(subset=['statement', 'status'], inplace=True)

print(f" Veri yÃ¼klendi. Kalan satÄ±r sayÄ±sÄ±: {df.shape[0]}")

# Metin temizleme fonksiyonu
def clean_text(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # linkleri sil
    text = re.sub(r'<.*?>+', '', text)  # html etiketleri sil
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # noktalama sil
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\w*\d\w*', '', text)  # sayÄ± iÃ§eren kelimeleri sil
    text = re.sub(r'\s+', ' ', text).strip()  # ekstra boÅŸluklarÄ± sil
    return text

# TemizlenmiÅŸ metinleri yeni sÃ¼tuna ekle
df['clean_text'] = df['statement'].astype(str).apply(clean_text)

# Ã–rnek ilk 5 temiz metni gÃ¶ster
print("\nTemizlenmiÅŸ metin Ã¶rnekleri:")
print(df['clean_text'].head())
df.to_csv("C:/Users/EKIN/Desktop/temizlenmis_veri.csv", index=False)

In [None]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"

from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
import time

# Veri yÃ¼kleme
data_path = "C:/Users/EKIN/Desktop/temizlenmis_veri.csv"
if not os.path.exists(data_path):
    raise FileNotFoundError(f" Veri dosyasÄ± bulunamadÄ±: {data_path}")
df = pd.read_csv(data_path)

# Gerekli sÃ¼tunlar var mÄ± kontrol et
required_columns = ["clean_text", "status"]
for col in required_columns:
    if col not in df.columns:
        raise ValueError(f"'{col}' sÃ¼tunu eksik! LÃ¼tfen veriyi kontrol et.")

#  MPNet modelini yÃ¼kle
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)
print(embeddings)


# Temiz metinleri al
texts = df["clean_text"].astype(str).tolist()

# Embed iÅŸlemi batch'lerle yapÄ±lacak
batch_size = 64
chunks = [texts[i:i+batch_size] for i in range(0, len(texts), batch_size)]
X_embed_chunks = []

print(f"\n Embed iÅŸlemi {len(chunks)} parÃ§ada yapÄ±lacak...\n")

for i, chunk in enumerate(chunks, 1):
    start = time.time()
    embeds = model.encode(chunk, batch_size=batch_size, show_progress_bar=False)
    X_embed_chunks.append(embeds)
    elapsed = time.time() - start
    print(f" Chunk {i}/{len(chunks)} iÅŸlendi. ({elapsed:.2f} saniye)")

# ðŸ”— VektÃ¶rleri birleÅŸtir
X_embed = np.vstack(X_embed_chunks)

#  Etiketleri sayÄ±ya Ã§evir
le = LabelEncoder()
y_encoded = le.fit_transform(df["status"].values)

#  Kaydetme yollarÄ±
embedding_npy_path = "C:/Users/EKIN/Desktop/mpnet_embeddings.npy"
label_npy_path = "C:/Users/EKIN/Desktop/mpnet_labels.npy"
embedding_csv_path = "C:/Users/EKIN/Desktop/mpnet_embeddings.csv"
encoder_save_path = "C:/Users/EKIN/Desktop/label_encoder.pkl"

# KayÄ±t iÅŸlemleri
np.save(embedding_npy_path, X_embed)
np.save(label_npy_path, y_encoded)
joblib.dump(le, encoder_save_path)

#  CSV olarak da kayÄ±t
X_embed_df = pd.DataFrame(X_embed)
X_embed_df["label"] = y_encoded
X_embed_df.to_csv(embedding_csv_path, index=False)

print("\n Embed iÅŸlemi tamamlandÄ±.")
print(f" Kaydedilen dosyalar:\n- {embedding_npy_path}\n- {label_npy_path}\n- {embedding_csv_path}\n- {encoder_save_path}")

In [None]:
from sklearn.decomposition import PCA
import numpy as np
import joblib

# Embed dosyasÄ±nÄ± yÃ¼kle
embedding_npy_path = "C:/Users/EKIN/Desktop/mpnet_embeddings.npy"
X_embed = np.load(embedding_npy_path)

# PCA nesnesi oluÅŸtur (Ã¶rnek: 400 boyut)
pca = PCA(n_components=400, random_state=42)

# PCA uygulama
X_reduced = pca.fit_transform(X_embed)

# Kaydetme yollarÄ±
pca_npy_path = "C:/Users/EKIN/Desktop/mpnet_embeddings_pca.npy"
pca_model_path = "C:/Users/EKIN/Desktop/pca_model.pkl"

# PCA sonucu kaydet
np.save(pca_npy_path, X_reduced)
joblib.dump(pca, pca_model_path)

print(" PCA ile boyut indirgeme tamamlandÄ±.")
print(f" Kaydedilen dosyalar:\n- {pca_npy_path}\n- {pca_model_path}")
print(f" Yeni boyut: {X_reduced.shape}")


In [None]:
import numpy as np
import joblib
from collections import Counter

# Etiketleri yÃ¼kle
label_npy_path = "C:/Users/EKIN/Desktop/mpnet_labels.npy"
y = np.load(label_npy_path)

# SÄ±nÄ±f daÄŸÄ±lÄ±mÄ±nÄ± yazdÄ±r
print("Orijinal sÄ±nÄ±f daÄŸÄ±lÄ±mÄ±:", Counter(y))


In [None]:
from imblearn.over_sampling import SMOTE
import numpy as np
import joblib

# PCA uygulanmÄ±ÅŸ veriyi yÃ¼kle
X = np.load("C:/Users/EKIN/Desktop/mpnet_embeddings_pca.npy")
y = np.load("C:/Users/EKIN/Desktop/mpnet_labels.npy")

# SMOTE uygulanÄ±yor
print(" SMOTE uygulanÄ±yor...")
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(f"SMOTE sonrasÄ± yeni veri boyutu: {X_resampled.shape}")
print(f" Yeni sÄ±nÄ±f daÄŸÄ±lÄ±mÄ±: {Counter(y_resampled)}")

# Kaydet
np.save("C:/Users/EKIN/Desktop/X_resampled.npy", X_resampled)
np.save("C:/Users/EKIN/Desktop/y_resampled.npy", y_resampled)

In [None]:
import re
import string
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import numpy as np
import joblib
from sentence_transformers import SentenceTransformer
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

#  Verileri yÃ¼kle
X = np.load("C:/Users/EKIN/Desktop/X_resampled.npy")
y = np.load("C:/Users/EKIN/Desktop/y_resampled.npy")

#  EÄŸitim ve test seti
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

#  TensÃ¶rlere Ã§evir
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

#  Veri yÃ¼kleyiciler
train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=128)

#  Model tanÄ±mÄ±
class DeepModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(DeepModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.dropout1 = nn.Dropout(0.4)
        self.fc2 = nn.Linear(512, 256)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(256, 128)
        self.out = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        return self.out(x)

input_dim = X.shape[1]
num_classes = len(np.unique(y))
model = DeepModel(input_dim, num_classes)

#  EÄŸitim ayarlarÄ±
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

#  EÄŸitimi baÅŸlat
for epoch in range(10):  # epoch sayÄ±sÄ±nÄ± ayarlayabilirsin
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch+1} tamamlandÄ± âœ…")

#  Test performansÄ±
model.eval()
with torch.no_grad():
    preds = []
    true = []
    for xb, yb in test_loader:
        output = model(xb)
        pred = torch.argmax(output, axis=1)
        preds.extend(pred.numpy())
        true.extend(yb.numpy())

acc = accuracy_score(true, preds)
f1 = f1_score(true, preds, average="weighted")
print(f" PyTorch Model Accuracy: {acc:.4f}, F1-score: {f1:.4f}")

#  Modeli kaydet
torch.save(model.state_dict(), "C:/Users/EKIN/Desktop/deep_model.pt")
print(" Model baÅŸarÄ±yla kaydedildi: deep_model.pt")

#  Tahmin iÃ§in fonksiyon
def clean_text(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\w*\d\w*', '', text)
    return re.sub(r'\s+', ' ', text).strip()

def predict_from_text_pytorch(text, encoder_path, pca_path, model_path, embed_model):
    text_clean = clean_text(text)
    vector = embed_model.encode([text_clean])
    pca = joblib.load(pca_path)
    vector_pca = pca.transform(vector)

    label_encoder = joblib.load(encoder_path)
    model = DeepModel(input_dim=vector_pca.shape[1], num_classes=len(label_encoder.classes_))
    model.load_state_dict(torch.load(model_path))
    model.eval()

    with torch.no_grad():
        x = torch.tensor(vector_pca, dtype=torch.float32)
        output = model(x)
        predicted = torch.argmax(output, axis=1).item()

    return label_encoder.inverse_transform([predicted])[0]

#  Ã–rnek cÃ¼mle tahmini
mpnet = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
test_text = "Iâ€™m feeling so anxious and overwhelmed lately."
predicted_label = predict_from_text_pytorch(
    test_text,
    encoder_path="C:/Users/EKIN/Desktop/label_encoder.pkl",
    pca_path="C:/Users/EKIN/Desktop/pca_model.pkl",
    model_path="C:/Users/EKIN/Desktop/deep_model.pt",
    embed_model=mpnet
)
print(f"\nðŸ’¡ PyTorch Modeli Tahmini: {predicted_label}")
