<a href="https://colab.research.google.com/github/BerkAIcelik/hate-speech-detection-system-using-TR-tweets/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install flask-ngrok
from flask_ngrok import run_with_ngrok
!pip install pyngrok
from pyngrok import ngrok

!ngrok authtoken ###################################


In [None]:
pip install zemberek-python

In [None]:
from flask import Flask, request, jsonify
import joblib
import numpy as np
import re
from gensim.models import Word2Vec
from zemberek import TurkishMorphology, TurkishSentenceNormalizer
from zemberek.tokenization import TurkishTokenizer
from xgboost import XGBClassifier
from pyngrok import ngrok

# Flask API başlat
app = Flask(__name__)

# Ngrok ile tünel aç
public_url = ngrok.connect(5000).public_url
print(f"🔗 Flask API'yi açmak için buraya tıkla: {public_url}")

# Modelleri yükle
word2vec_path = "/content/word2vec_model.model"
model_path = "xgboost_combined_word2vec.pkl"

word2vec_model = Word2Vec.load(word2vec_path)  # Word2Vec Modeli
model = joblib.load(model_path)  # XGBoost Modeli

# Loading Zemberek components
morphology = TurkishMorphology.create_with_defaults()
spell_checker = TurkishSpellChecker(morphology)
tokenizer = TurkishTokenizer.DEFAULT
stop_words = set(stopwords.words('turkish'))
normalizer = TurkishSentenceNormalizer(morphology)

# Tokenizasyon ve stopword temizleme
def tokenize_and_clean(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\.\S+', '', text)  # URL temizleme
    text = re.sub(r'@\w+', '', text)  # Kullanıcı etiketleri
    text = re.sub(r'[0-9]', '', text)  # Sayıları temizleme
    text = re.sub(r'[^\w\s]', '', text)  # Noktalama temizleme
    text = re.sub(r'\s+', ' ', text).strip()
    text = normalizer.normalize(text)  # Zemberek ile normalize et
    tokens = [token.content for token in tokenizer.tokenize(text)]
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

# Word2Vec ile vektör oluşturma fonksiyonu
def get_word_vector(tokens):
    vectors = [word2vec_model.wv[word] for word in tokens if word in word2vec_model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(300)  # Vektör yoksa sıfırlarla doldur

# Flask route'ları
@app.route('/')
def home():
    return "Flask API Çalışıyor!"

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    tweet = data.get("tweet")

    if not tweet:
        return jsonify({"error": "Lütfen bir tweet girin"}), 400

    # Veriyi temizle ve tokenize et
    tokens = tokenize_and_clean(tweet)
    # Vektör oluştur
    vector = get_word_vector(tokens)

    # Tahmin yap
    prediction = model.predict([vector])

    # Tahmini döndür
    return jsonify({"tweet": tweet, "prediction": str(prediction[0])})

if __name__ == '__main__':
    app.run(port=5000)
