In [5]:
import pandas as pd
import numpy as np
import pickle
import re
import string
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from sklearn.metrics import accuracy_score
import spacy
from textstat import flesch_reading_ease, automated_readability_index
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# SpaCy yüklemesi
try:
    nlp = spacy.load("en_core_web_sm")
except:
    nlp = None

# Model ve preprocessing bileşenleri yükleniyor
try:
    lstm_model = tf.keras.models.load_model(".ipynb_checkpoints/lstm_model.h5")
    with open('.ipynb_checkpoints/preprocessing_components.pkl', 'rb') as f:
        preprocessing = pickle.load(f)
    sequence_length = preprocessing['sequence_length']
    num_text_features = preprocessing['num_text_features'] 
    num_fin_features = preprocessing['num_fin_features']
    financial_keywords = preprocessing['financial_keywords']
except Exception as e:
    print("Model veya preprocessing dosyası yüklenemedi:", e)
    exit()

# Veri yükleme
test_data = pd.read_csv('stockMarket_predict/archive/reliance/finaldata_updated_labels.csv')

# Metin temizleme
def clean_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r"b['\"]|['\"]", "", text)
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'\d+', 'NUMBER', text)
    text = re.sub(r'[^\w\s.,!?;:]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# Sequence oluşturma
def create_sequence(dataframe, sequence_length):
    x_seq, y_seq = [], []
    for i in range(len(dataframe) - sequence_length):
        seq = dataframe.iloc[i:i + sequence_length]
        x_seq.append(seq.drop(columns=["Date", "label"]).values)
        y_seq.append(dataframe.iloc[i + sequence_length]["label"])
    return x_seq, y_seq

# Tarih ve temizleme işlemleri
test_data["Date"] = pd.to_datetime(test_data["Date"], format="%d-%m-%Y")
test_data = test_data.sort_values("Date").reset_index(drop=True)
test_data['Combined'] = test_data.iloc[:, 2:27].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
test_data['Cleaned'] = test_data['Combined'].apply(clean_text)

# Sentiment skorları
sia = SentimentIntensityAnalyzer()
sentiment_df = test_data['Cleaned'].apply(lambda x: pd.Series(sia.polarity_scores(x)))

test_data['Compound'] = sentiment_df['compound']
test_data['Positive'] = sentiment_df['pos']
test_data['Negative'] = sentiment_df['neg']
test_data['Neutral'] = sentiment_df['neu']
test_data['headlines'] = 1

# Günlük özellikler
daily_data = test_data.groupby("Date").agg(
    Compound_mean=('Compound', 'mean'),
    Compound_max=('Compound', 'max'),
    Compound_std=('Compound', 'std'),
    Positive_mean=('Positive', 'mean'),
    Positive_max=('Positive', 'max'),
    Negative_mean=('Negative', 'mean'),
    Negative_max=('Negative', 'max'),
    Neutral_mean=('Neutral', 'mean'),
    Neutral_max=('Neutral', 'max'),
    headline_count=('headlines', 'count'),
    label=('label', 'first')
).fillna(0)

# Ek zaman serisi özellikleri
daily_data = daily_data.reset_index().sort_values('Date')
daily_data["volume_trend"] = daily_data["headline_count"].diff(periods=3)
daily_data["compound_momentum"] = daily_data["Compound_mean"].diff().ewm(alpha=0.3).mean()
daily_data["pos_trend"] = daily_data["Positive_mean"].diff(periods=3)
daily_data["neg_trend"] = daily_data["Negative_mean"].diff(periods=3)
daily_data = daily_data.fillna(0)

# Sequence oluştur
X_seq, y_seq = create_sequence(daily_data, sequence_length)
X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# Test bölme
split_index = int(0.8 * len(X_seq))
X_test = X_seq[split_index:]
y_test = y_seq[split_index:]

# Feature'ları ayır
text_features = X_test[:, :, :num_text_features]
fin_features = X_test[:, :, num_text_features:]

# 🔧 MODELİN BEKLEDİĞİ BOYUTA GETİRME (Padding)

# Modelin beklediği feature sayıları
expected_text_features = 15
expected_fin_features = 2

# Text input padding
if text_features.shape[2] < expected_text_features:
    pad_width = expected_text_features - text_features.shape[2]
    text_padding = np.zeros((text_features.shape[0], text_features.shape[1], pad_width))
    text_features = np.concatenate([text_features, text_padding], axis=2)

# Financial input padding
if fin_features.shape[2] < expected_fin_features:
    pad_width = expected_fin_features - fin_features.shape[2]
    fin_padding = np.zeros((fin_features.shape[0], fin_features.shape[1], pad_width))
    fin_features = np.concatenate([fin_features, fin_padding], axis=2)

# Tahmin ve değerlendirme
y_pred_probs = lstm_model.predict([text_features, fin_features])
y_pred = (y_pred_probs > 0.5).astype(int).flatten()
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Accuracy: 0.5323
