<a href="https://colab.research.google.com/github/Tnjdh014/MarketAnomalyDetection/blob/main/Market_AnomalyDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score

# Step 1: Load and Preprocess Data
data_path = '/content/drive/MyDrive/sp500_trends.csv'  # Replace with your dataset's path
data = pd.read_csv(data_path)

# Data cleaning and preprocessing
data = data.iloc[4:].reset_index(drop=True)
data.columns = [f"Column_{i}" if col.startswith("Unnamed") else col for i, col in enumerate(data.columns)]
data.replace('XAU BGNL Curncy', np.nan, inplace=True)
data = data.dropna(axis=1, thresh=len(data) * 0.5).fillna(method='ffill').fillna(0)

# Handle target for binary classification
target_column = 'sp500_increase'  # Replace with correct target column if needed
if target_column not in data.columns:
    target_column = 'Column_1'

X = data.drop(columns=[target_column])
y = data[target_column]

# Encode categorical features
categorical_features = X.select_dtypes(include=['object']).columns
for feature in categorical_features:
    X = pd.concat([X, pd.get_dummies(X[feature], prefix=feature, drop_first=True)], axis=1)
    X.drop(columns=[feature], inplace=True)

# Encode target labels if necessary
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Step 2: Train Classification Models
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest Classifier": RandomForestClassifier(n_estimators=100, random_state=42),
    "Neural Network Classifier": MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
}

predictions = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    predictions[name] = model.predict(X_test_scaled)

# Step 3: Evaluate Models
metrics = {}
for name, preds in predictions.items():
    metrics[name] = {
        "Accuracy": accuracy_score(y_test, preds),
        "F1 Score": f1_score(y_test, preds, average='weighted'),
        "Precision": precision_score(y_test, preds, average='weighted'),
        "Recall": recall_score(y_test, preds, average='weighted'),
        "Classification Report": classification_report(y_test, preds),
        "Confusion Matrix": confusion_matrix(y_test, preds)
    }

# Display metrics
for name, metric in metrics.items():
    print(f"\nModel: {name}")
    for key, value in metric.items():
        if key in ["Classification Report", "Confusion Matrix"]:
            print(f"{key}:\n{value}")
        else:
            print(f"{key}: {value:.2f}")

# Identify the best model
best_model_name = max(metrics, key=lambda x: metrics[x]["Accuracy"])
best_model = models[best_model_name]
print(f"\nBest Model: {best_model_name} with Accuracy: {metrics[best_model_name]['Accuracy'] * 100:.2f}%")

# Step 4: Investment Strategy
def investment_strategy(prediction):
    if prediction == 1:  # Crash predicted
        return "Minimize losses: Consider hedging with inverse ETFs, bond investments, or safe-haven assets."
    else:  # No crash predicted
        return "Maximize returns: Consider high-growth stocks, tech sector, or diversified investments."

# Example strategy for the first sample
example_prediction = best_model.predict(X_test_scaled.iloc[0:1])[0]
strategy = investment_strategy(example_prediction)
print(f"\nInvestment Strategy for example prediction: {strategy}")

# Step 5: Integrate FinguAI-Chat for Chatbot Interaction
model_id = 'FINGU-AI/FinguAI-Chat-v1'  # Replace with the correct model ID
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_id)
streamer = TextStreamer(tokenizer)
model.to('cuda')  # Ensure GPU usage

def chatbot_interaction(user_prompt):
    messages = [
        {"role": "system", "content": "You are a finance specialist. Help the user with investment strategies."},
        {"role": "user", "content": user_prompt}
    ]
    tokenized_chat = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        tokenized_chat,
        max_new_tokens=1000,
        use_cache=True,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        top_k=50,
        eos_token_id=tokenizer.eos_token_id,
        streamer=streamer
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

# Interactive chatbot prompt
user_prompt = input("\nEnter your question for the AI chatbot: ")
response = chatbot_interaction(user_prompt)
print("\nChatbot Response:\n", response)




  data = data.dropna(axis=1, thresh=len(data) * 0.5).fillna(method='ffill').fillna(0)



Model: Logistic Regression
Accuracy: 0.86
F1 Score: 0.86
Precision: 0.86
Recall: 0.86
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.81      0.83       181
           1       0.86      0.90      0.88       244

    accuracy                           0.86       425
   macro avg       0.86      0.85      0.86       425
weighted avg       0.86      0.86      0.86       425

Confusion Matrix:
[[146  35]
 [ 24 220]]

Model: Random Forest Classifier
Accuracy: 0.99
F1 Score: 0.99
Precision: 0.99
Recall: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       181
           1       0.99      0.99      0.99       244

    accuracy                           0.99       425
   macro avg       0.99      0.99      0.99       425
weighted avg       0.99      0.99      0.99       425

Confusion Matrix:
[[179   2]
 [  3 241]]

Model: Neural Network Classifier
Accur

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
