In [None]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import classification_report
import pandas as pd


finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_pipe = pipeline("sentiment-analysis", model=finbert_model, tokenizer=finbert_tokenizer)


data = pd.read_csv("financial_sentiment_dataset.csv")  
texts = data["text"].tolist()


label_map = {"positive": 1, "negative": 0, "neutral": 2}
true_labels = [label_map[label] for label in data["label"].tolist()]


finbert_preds = [label_map[finbert_pipe(text)[0]['label'].lower()] for text in texts]


llama_pipe = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")  

llama_preds = []
for text in texts:
    result = llama_pipe(text, candidate_labels=["positive", "negative", "neutral"])
    predicted_label = result["labels"][0]  
    llama_preds.append(label_map[predicted_label])


print("FinBERT Sentiment Analysis Performance:")
print(classification_report(true_labels, finbert_preds))

print("\nLLaMA Sentiment Analysis Performance:")
print(classification_report(true_labels, llama_preds))

Device set to use cpu
Device set to use cpu


FinBERT Sentiment Analysis Performance:
              precision    recall  f1-score   support

           0       0.73      1.00      0.85       332
           1       0.66      0.76      0.71       339
           2       0.72      0.34      0.46       329

    accuracy                           0.70      1000
   macro avg       0.70      0.70      0.67      1000
weighted avg       0.70      0.70      0.67      1000


LLaMA Sentiment Analysis Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       332
           1       0.68      0.87      0.77       339
           2       0.82      0.58      0.68       329

    accuracy                           0.82      1000
   macro avg       0.83      0.82      0.82      1000
weighted avg       0.83      0.82      0.82      1000



In [None]:
import faiss
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize


num_docs, embed_dim = 1000, 768
document_embeddings = np.random.rand(num_docs, embed_dim).astype('float32')


for i in [42, 87, 300, 150, 678]:  
    document_embeddings[i] = document_embeddings[0] + np.random.rand(embed_dim) * 0.1 


document_embeddings = normalize(document_embeddings, axis=1)

index = faiss.IndexFlatL2(embed_dim)
index.add(document_embeddings)


query_vector = (document_embeddings[0] + np.random.rand(embed_dim) * 0.1).reshape(1, -1).astype('float32')


_, faiss_results = index.search(query_vector, 5)
retrieved_faiss = set(faiss_results[0][:5])


ground_truth = {42, 87, 300, 150, 678}  


faiss_precision = len(retrieved_faiss & ground_truth) / 5


documents = [f"Company earnings report Q{i % 4 + 1} revenue growth {i}" for i in range(num_docs)]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)

query_text = ["Company earnings report Q1 revenue growth"]
query_vector_tfidf = vectorizer.transform(query_text)
keyword_results = np.argsort(-np.array(tfidf_matrix @ query_vector_tfidf.T).flatten())[:5]
retrieved_tfidf = set(keyword_results)

tfidf_precision = len(retrieved_tfidf & ground_truth) / 5

print(f"FAISS Precision@5: {faiss_precision:.4f}")
print(f"TF-IDF Precision@5: {tfidf_precision:.4f}")


FAISS Precision@5: 1.0000
TF-IDF Precision@5: 0.0000


In [None]:
from prophet import Prophet
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error


df = pd.read_csv("stock_prices_large.csv")  
df['ds'] = pd.to_datetime(df['date'])
df['y'] = np.log(df['close'])  


df['SMA_10'] = df['y'].rolling(10).mean()
df['SMA_50'] = df['y'].rolling(50).mean()
df['volatility'] = df['y'].rolling(20).std()
df.dropna(inplace=True)


prophet = Prophet(changepoint_prior_scale=0.05)  
prophet.fit(df[['ds', 'y']])


future = prophet.make_future_dataframe(periods=30)
forecast = prophet.predict(future)
forecast['yhat'] = np.exp(forecast['yhat'])  


prophet_mse = mean_squared_error(df['y'].iloc[-30:], np.log(forecast['yhat'].iloc[-30:]))



class StockLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x, _ = self.lstm(x)
        return self.fc(x[:, -1, :])  


def create_sequences(data, seq_length=30):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)


seq_length = 30  
y_data = df['y'].values
X, y = create_sequences(y_data, seq_length)


train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)  
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(-1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)


model = StockLSTM()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
    lstm_preds = model(X_test_tensor).squeeze().numpy()

transformer_mse = mean_squared_error(y_test, lstm_preds)

print(f"\nProphet MSE: {prophet_mse:.4f}")
print(f"LSTM Transformer MSE: {transformer_mse:.4f}")


18:08:01 - cmdstanpy - INFO - Chain [1] start processing
18:08:02 - cmdstanpy - INFO - Chain [1] done processing


Epoch 10/50, Loss: 33.1685
Epoch 20/50, Loss: 19.2464
Epoch 30/50, Loss: 8.2334
Epoch 40/50, Loss: 3.0594
Epoch 50/50, Loss: 1.2061

Prophet MSE: 0.0001
LSTM Transformer MSE: 2.9451


In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

df = pd.read_csv("large_stock_data.csv")

np.random.seed(42)
df['close'] += np.random.normal(0, df['close'].std() * 0.05, size=len(df))  

df['SMA_10'] = df['close'].rolling(window=10).mean()
df['SMA_50'] = df['close'].rolling(window=50).mean()
df['RSI'] = 100 - (100 / (1 + df['close'].pct_change().rolling(14).mean()))  
df['volatility'] = df['close'].rolling(20).std()  


df['target'] = (df['close'].shift(-3) > df['close']).astype(int)


df.dropna(inplace=True)

X = df[['SMA_10', 'SMA_50', 'RSI', 'volatility', 'volume']]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.05)
model.fit(X_train, y_train)

y_pred_xgb = model.predict(X_test)
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)


y_pred_rule = (X_test['SMA_10'] > X_test['SMA_50']).astype(int)
rule_accuracy = accuracy_score(y_test, y_pred_rule)

print(f"XGBoost Trading Model Accuracy: {xgb_accuracy:.4f}")
print(f"Rule-Based Strategy Accuracy: {rule_accuracy:.4f}")


XGBoost Trading Model Accuracy: 0.5861
Rule-Based Strategy Accuracy: 0.4977


In [5]:
import time
import requests

# API URL
url = "http://localhost:8501/query_stock"

# Measure latency for multiple runs
latencies = []
for _ in range(10):
    start_time = time.time()
    response = requests.get(url, params={'query': 'AAPL stock price'})
    latencies.append(time.time() - start_time)

# Compute Average Latency
avg_latency = sum(latencies) / len(latencies)
print(f"Average API Response Time: {avg_latency:.4f} seconds")


ConnectionError: HTTPConnectionPool(host='localhost', port=8501): Max retries exceeded with url: /query_stock?query=AAPL+stock+price (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000002BB797F35E0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))