In [None]:
import openai
import pandas as pd
import json
from tqdm import tqdm
import time
from datetime import datetime
import re

# Set your OpenAI API key
openai.api_key = "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"

# Load tweets CSV
df = pd.read_csv("/content/stock_tweets 2.csv")
# Verify required columns exist
required_columns = ['Date', 'Tweet', 'Stock Name']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
    print(f"Missing required columns: {missing_columns}")
    print(f"Available columns: {df.columns.tolist()}")
    raise ValueError("Required columns not found in CSV")

# Filter for Tesla data only
tesla_data = df[df['Stock Name'] == 'TSLA'].copy()

if len(tesla_data) == 0:
    print("No Tesla data found. Checking for alternative Tesla naming...")
    # Look for alternative ways Tesla might be labeled
    sample_companies = df['Stock Name'].unique()
    tesla_alternatives = [name for name in sample_companies if 'tsla' in str(name).lower()]

    if tesla_alternatives:
        print(f"Found possible Tesla entries: {tesla_alternatives}")
        # Use the first alternative found
        tesla_data = df[df['Stock Name'] == tesla_alternatives[0]].copy()
        print(f"Using '{tesla_alternatives[0]}' as Tesla company name")
    else:
        print("Available companies in data:")
        print(df['Stock Name'].unique())
        raise ValueError("No Tesla data found in the dataset")

# Convert date string to datetime objects
try:
    tesla_data['Date'] = pd.to_datetime(tesla_data['Date'])
    print(f"Successfully parsed dates. Date range: {tesla_data['Date'].min()} to {tesla_data['Date'].max()}")
except Exception as e:
    print(f"Warning: Could not parse dates. Error: {e}")
    print("Sample dates from your data:")
    print(tesla_data['Date'].head())
    raise ValueError("Date parsing failed")

# Function to sanitize tweets for safer processing
def sanitize_tweet(tweet):
    if not isinstance(tweet, str):
        return ""

    # Replace problematic characters that might break JSON
    sanitized = (str(tweet)
                .replace('\\', '\\\\')  # Escape backslashes first
                .replace('"', '\\"')    # Escape double quotes
                .replace('\n', ' ')     # Replace newlines with spaces
                .replace('\r', ' ')     # Replace carriage returns
                .replace('\t', ' ')     # Replace tabs
                .replace('\b', ' '))    # Replace backspaces

    # Truncate overly long tweets to avoid context window issues
    if len(sanitized) > 280:
        sanitized = sanitized[:277] + "..."

    return sanitized

# Process tweets in smaller batches with simple numbered format
batch_size = 50  # Small batch size for reliability
all_results = []

for i in tqdm(range(0, len(tesla_data), batch_size)):
    end_idx = min(i + batch_size, len(tesla_data))
    batch_data = tesla_data.iloc[i:end_idx]
    batch = batch_data['Tweet'].tolist()

    # Sanitize tweets in this batch
    sanitized_batch = [sanitize_tweet(tweet) for tweet in batch]

    # Create a numbered batch for the model
    numbered_tweets = [f"Tweet {j+1}: {tweet}" for j, tweet in enumerate(sanitized_batch)]
    tweets_text = "\n".join(numbered_tweets)

    # Create the prompt
    system_prompt = "You are a financial sentiment analyst who classifies tweets about Tesla and its stock as Positive, Negative, or Neutral."

    user_prompt = f"""Analyze the sentiment of each of these Tesla-related tweets.
For each tweet, determine if the sentiment toward Tesla or its stock is Positive, Negative, or Neutral.

{tweets_text}

For EACH tweet, respond with ONLY a number and sentiment classification in this EXACT format:
1: Positive
2: Negative
3: Neutral

Provide one line per tweet, numbered exactly as above.
"""

    max_retries = 3
    for attempt in range(max_retries):
        try:
            print(f"\nProcessing batch {i//batch_size + 1} of {(len(tesla_data) + batch_size - 1)//batch_size} (attempt {attempt+1})...")

            # Get response from OpenAI
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0,
                max_tokens=1024
            )

            content = response['choices'][0]['message']['content']
            print(f"Response received ({len(content)} chars)")

            # Parse the simpler format line by line
            sentiment_pattern = r'^(\d+): (Positive|Negative|Neutral)$'

            batch_results = []
            for line in content.split('\n'):
                line = line.strip()
                if not line:
                    continue

                match = re.match(sentiment_pattern, line)
                if match:
                    tweet_idx = int(match.group(1)) - 1
                    if 0 <= tweet_idx < len(batch):
                        # Store the result along with the row index from the original dataframe
                        original_idx = batch_data.index[tweet_idx]
                        batch_results.append({
                            "index": original_idx,
                            "Tweet": batch[tweet_idx],
                            "Sentiment": match.group(2)
                        })
                    else:
                        print(f"Warning: Tweet index out of range: {tweet_idx+1}")
                else:
                    print(f"Warning: Could not parse line: {line}")

            # Validate we got results
            if len(batch_results) > 0:
                print(f"Successfully processed {len(batch_results)}/{len(batch)} tweets in batch")
                all_results.extend(batch_results)
                break  # Success
            else:
                print("Failed to parse any results. Retrying...")
                if attempt < max_retries - 1:
                    time.sleep(2)  # Short pause before retry

        except Exception as e:
            print(f"Error in batch {i//batch_size + 1}: {type(e).__name__}: {str(e)}")
            if attempt < max_retries - 1:
                print("Retrying after a short delay...")
                time.sleep(5)
            else:
                print(f"Failed batch after {max_retries} attempts. Moving to next batch.")

    # Add a short pause between batches to avoid rate limits
    time.sleep(1)

# Check if we have results
if not all_results:
    print("No results were collected. Please check the error messages.")
    exit()

# Create a results dataframe and update the original dataframe
results_df = pd.DataFrame(all_results)

# Add the sentiment back to the original tesla dataframe using the stored indices
tesla_data['Sentiment'] = None
for result in all_results:
    tesla_data.loc[result['index'], 'Sentiment'] = result['Sentiment']

# Add a numeric sentiment score for easier analysis
sentiment_values = {"Positive": 1, "Negative": -1, "Neutral": 0}
tesla_data['Sentiment_Score'] = tesla_data['Sentiment'].map(sentiment_values)

# Create daily sentiment aggregation
daily_sentiment = tesla_data.groupby(tesla_data['Date'].dt.date).agg(
    Tweet_Count=('Tweet', 'count'),
    Avg_Sentiment_Score=('Sentiment_Score', 'mean'),
    Positive_Count=('Sentiment_Score', lambda x: sum(x == 1)),
    Negative_Count=('Sentiment_Score', lambda x: sum(x == -1)),
    Neutral_Count=('Sentiment_Score', lambda x: sum(x == 0))
).reset_index()

# Calculate sentiment ratio
daily_sentiment['Positive_Ratio'] = daily_sentiment['Positive_Count'] / daily_sentiment['Tweet_Count']
daily_sentiment['Negative_Ratio'] = daily_sentiment['Negative_Count'] / daily_sentiment['Tweet_Count']

# Save the results with timestamp
timestamp = time.strftime("%Y%m%d-%H%M%S")
tesla_data.to_csv(f"tesla_data_with_sentiment_{timestamp}.csv", index=False)
daily_sentiment.to_csv(f"tesla_daily_sentiment_{timestamp}.csv", index=False)

print(f"\nAnalysis complete! Processed {len(all_results)} tweets.")
print(f"Files saved:")
print(f"- tesla_data_with_sentiment_{timestamp}.csv")
print(f"- tesla_daily_sentiment_{timestamp}.csv")

# Show sentiment distribution
sentiment_counts = tesla_data['Sentiment'].value_counts()
print("\nSentiment Distribution:")
for sentiment, count in sentiment_counts.items():
    if pd.notna(sentiment):  # Check if sentiment is not NaN
        print(f"- {sentiment}: {count} tweets ({count/len(tesla_data)*100:.1f}%)")

# Show the most extreme days
if len(daily_sentiment) > 0:
    most_positive_day = daily_sentiment.loc[daily_sentiment['Positive_Ratio'].idxmax()]
    most_negative_day = daily_sentiment.loc[daily_sentiment['Negative_Ratio'].idxmax()]

    print(f"\nDay with most positive sentiment: {most_positive_day['Date']} ({most_positive_day['Positive_Ratio']*100:.1f}% positive)")
    print(f"Day with most negative sentiment: {most_negative_day['Date']} ({most_negative_day['Negative_Ratio']*100:.1f}% negative)")

In [None]:
import yfinance as tf
import pandas as pd

# Download Tesla data for the past 5 years
ticker = "TSLA"
data = tf.download(ticker, period="5y")

# Save to CSV
data.to_csv(f"{ticker}_historical_prices.csv")

print(f"Downloaded {ticker} price data and saved to CSV")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os

# Load sentiment data (adjust file path as needed)
# This assumes you've already run the sentiment analysis script
def load_data(sentiment_file, stock_price_file):
    """
    Load and prepare both sentiment and stock price data.

    Parameters:
    - sentiment_file: Path to the sentiment analysis results (daily aggregation)
    - stock_price_file: Path to the stock price historical data

    Returns:
    - merged_data: A dataframe with both sentiment and price data
    """
    print(f"Loading sentiment data from {sentiment_file}")
    sentiment_df = pd.read_csv(sentiment_file)

    # Ensure date is in datetime format
    sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])

    print(f"Loading stock price data from {stock_price_file}")
    prices_df = pd.read_csv(stock_price_file)

    # Ensure date is in datetime format
    prices_df['Date'] = pd.to_datetime(prices_df['Date'])

    # Merge sentiment data with stock price data
    merged_data = pd.merge(sentiment_df, prices_df, on='Date', how='inner')

    print(f"Data merged successfully. Found {len(merged_data)} days with both sentiment and price data.")
    return merged_data

# Create trading signals based on sentiment
def generate_trading_signals(data, strategy='threshold', threshold=0.6):
    """
    Generate buy/sell signals based on sentiment data.

    Parameters:
    - data: DataFrame with sentiment and price data
    - strategy: Strategy to use ('threshold', 'relative', or 'trend')
    - threshold: Sentiment threshold for buy/sell decisions

    Returns:
    - data: DataFrame with added signal column
    """
    # Make a copy to avoid modifying the original
    df = data.copy()

    # Initialize signal column (0 = hold, 1 = buy, -1 = sell)
    df['Signal'] = 0

    if strategy == 'threshold':
        # Simple threshold-based strategy
        # Buy when positive sentiment exceeds threshold
        # Sell when negative sentiment exceeds threshold
        df.loc[df['Positive_Ratio'] >= threshold, 'Signal'] = 1
        df.loc[df['Negative_Ratio'] >= threshold, 'Signal'] = -1

    elif strategy == 'relative':
        # Compare positive vs negative sentiment
        # Buy when positive significantly exceeds negative
        # Sell when negative significantly exceeds positive
        df['Sentiment_Difference'] = df['Positive_Ratio'] - df['Negative_Ratio']
        df.loc[df['Sentiment_Difference'] >= threshold, 'Signal'] = 1
        df.loc[df['Sentiment_Difference'] <= -threshold, 'Signal'] = -1

    elif strategy == 'trend':
        # Look at sentiment trends over time
        window = 3  # Consider last 3 days

        # Calculate moving averages of sentiment
        df['Positive_MA'] = df['Positive_Ratio'].rolling(window=window).mean()
        df['Negative_MA'] = df['Negative_Ratio'].rolling(window=window).mean()

        # Generate signals based on trend
        df['Positive_Trend'] = df['Positive_Ratio'] > df['Positive_MA']
        df['Negative_Trend'] = df['Negative_Ratio'] > df['Negative_MA']

        # Buy when positive sentiment is trending up and above threshold
        df.loc[(df['Positive_Trend']) & (df['Positive_Ratio'] > threshold), 'Signal'] = 1

        # Sell when negative sentiment is trending up and above threshold
        df.loc[(df['Negative_Trend']) & (df['Negative_Ratio'] > threshold), 'Signal'] = -1

    else:
        raise ValueError(f"Unknown strategy: {strategy}")

    # Count the number of signals generated
    buy_signals = (df['Signal'] == 1).sum()
    sell_signals = (df['Signal'] == -1).sum()
    hold_signals = (df['Signal'] == 0).sum()

    print(f"Strategy '{strategy}' generated:")
    print(f"- Buy signals: {buy_signals} ({buy_signals/len(df)*100:.1f}%)")
    print(f"- Sell signals: {sell_signals} ({sell_signals/len(df)*100:.1f}%)")
    print(f"- Hold signals: {hold_signals} ({hold_signals/len(df)*100:.1f}%)")

    return df

# Run a backtest simulation of the trading strategy
def backtest_strategy(data, initial_capital=10000, position_size=0.25, commission=0.001):
    """
    Simulate trading based on the signals and evaluate performance.

    Parameters:
    - data: DataFrame with price data and trading signals
    - initial_capital: Starting capital for the simulation
    - position_size: Percentage of capital to allocate per trade
    - commission: Commission rate per trade

    Returns:
    - performance: DataFrame with portfolio value over time
    - metrics: Dictionary with performance metrics
    """
    # Make a copy of the data
    df = data.copy()

    # Ensure we have price data (adjust column names if needed)
    price_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close']
    available_columns = [col for col in price_columns if col in df.columns]

    if not available_columns:
        raise ValueError("No price data columns found. Need at least one of: Open, High, Low, Close, Adj Close")

    # Use the first available price column
    price_col = available_columns[0]
    print(f"Using '{price_col}' column for price data")

    # Initialize portfolio tracking
    df['Position'] = 0  # Number of shares held
    df['Capital'] = initial_capital
    df['Portfolio_Value'] = initial_capital

    # Track trades
    trades = []

    # Simulate trading day by day
    position = 0  # Current position (number of shares)
    capital = initial_capital  # Current cash

    for i in range(1, len(df)):
        yesterday = df.iloc[i-1]
        today = df.iloc[i]

        # Get yesterday's signal and today's price
        signal = yesterday['Signal']
        price = today[price_col]

        # Calculate portfolio value (cash + shares)
        portfolio_value = capital + position * price

        # Execute trades based on signals
        if signal == 1 and position == 0:  # Buy signal and not already in position
            # Calculate position size
            trade_value = portfolio_value * position_size
            shares_to_buy = trade_value / price

            # Account for commission
            commission_cost = trade_value * commission

            if trade_value + commission_cost <= capital:
                # Execute buy
                position = shares_to_buy
                capital -= (trade_value + commission_cost)

                # Record trade
                trades.append({
                    'Date': today['Date'],
                    'Action': 'BUY',
                    'Price': price,
                    'Shares': shares_to_buy,
                    'Value': trade_value,
                    'Commission': commission_cost,
                    'Portfolio_Value': portfolio_value
                })

        elif signal == -1 and position > 0:  # Sell signal and have a position
            # Calculate trade value
            trade_value = position * price

            # Account for commission
            commission_cost = trade_value * commission

            # Execute sell
            capital += (trade_value - commission_cost)

            # Record trade
            trades.append({
                'Date': today['Date'],
                'Action': 'SELL',
                'Price': price,
                'Shares': position,
                'Value': trade_value,
                'Commission': commission_cost,
                'Portfolio_Value': portfolio_value
            })

            # Reset position
            position = 0

        # Update tracking
        df.loc[df.index[i], 'Position'] = position
        df.loc[df.index[i], 'Capital'] = capital
        df.loc[df.index[i], 'Portfolio_Value'] = capital + position * price

    # Create a trades dataframe
    trades_df = pd.DataFrame(trades) if trades else pd.DataFrame()

    # Calculate performance metrics
    metrics = calculate_performance_metrics(df, initial_capital, trades_df)

    return df, trades_df, metrics

# Calculate performance metrics for the strategy
def calculate_performance_metrics(performance_data, initial_capital, trades_df):
    """
    Calculate key performance metrics for the trading strategy.

    Parameters:
    - performance_data: DataFrame with portfolio value over time
    - initial_capital: Starting capital amount
    - trades_df: DataFrame with trade details

    Returns:
    - metrics: Dictionary with performance metrics
    """
    metrics = {}

    # Extract performance data
    df = performance_data.copy()

    # Calculate basic metrics
    final_value = df['Portfolio_Value'].iloc[-1]
    total_return = (final_value - initial_capital) / initial_capital

    # Calculate buy & hold return for comparison
    first_price = df.iloc[0]['Close'] if 'Close' in df.columns else df.iloc[0]['Adj Close'] if 'Adj Close' in df.columns else None
    last_price = df.iloc[-1]['Close'] if 'Close' in df.columns else df.iloc[-1]['Adj Close'] if 'Adj Close' in df.columns else None

    if first_price is not None and last_price is not None:
        buy_hold_return = (last_price - first_price) / first_price
    else:
        buy_hold_return = None

    # Calculate annualized return
    days = (df['Date'].iloc[-1] - df['Date'].iloc[0]).days
    years = days / 365.25
    annualized_return = (1 + total_return) ** (1 / years) - 1 if years > 0 else None

    # Calculate volatility (standard deviation of daily returns)
    df['Daily_Return'] = df['Portfolio_Value'].pct_change()
    volatility = df['Daily_Return'].std() * (252 ** 0.5)  # Annualized

    # Calculate Sharpe Ratio (assuming risk-free rate of 0.02)
    risk_free_rate = 0.02
    sharpe_ratio = (annualized_return - risk_free_rate) / volatility if volatility > 0 else None

    # Maximum drawdown
    df['Cumulative_Max'] = df['Portfolio_Value'].cummax()
    df['Drawdown'] = (df['Portfolio_Value'] - df['Cumulative_Max']) / df['Cumulative_Max']
    max_drawdown = df['Drawdown'].min()

    # Trade statistics
    num_trades = len(trades_df)

    if num_trades > 0:
        winning_trades = trades_df[trades_df['Action'] == 'SELL'].copy()
        if not winning_trades.empty:
            # For each sell, find the corresponding buy
            buy_prices = []
            for idx, sell_trade in winning_trades.iterrows():
                # Find the most recent buy before this sell
                buy_trades = trades_df[(trades_df['Action'] == 'BUY') &
                                      (trades_df['Date'] < sell_trade['Date'])]
                if not buy_trades.empty:
                    most_recent_buy = buy_trades.iloc[-1]
                    buy_prices.append(most_recent_buy['Price'])
                else:
                    buy_prices.append(None)

            winning_trades['Buy_Price'] = buy_prices
            winning_trades = winning_trades.dropna(subset=['Buy_Price'])

            if not winning_trades.empty:
                winning_trades['Profit'] = (winning_trades['Price'] - winning_trades['Buy_Price']) * winning_trades['Shares']
                winning_trades['Profit'] -= winning_trades['Commission']  # Account for commission

                win_count = (winning_trades['Profit'] > 0).sum()
                loss_count = (winning_trades['Profit'] <= 0).sum()

                win_rate = win_count / len(winning_trades) if len(winning_trades) > 0 else 0

                avg_profit = winning_trades[winning_trades['Profit'] > 0]['Profit'].mean() if win_count > 0 else 0
                avg_loss = winning_trades[winning_trades['Profit'] <= 0]['Profit'].mean() if loss_count > 0 else 0

                profit_factor = abs(avg_profit / avg_loss) if avg_loss != 0 else float('inf')
            else:
                win_rate = 0
                avg_profit = 0
                avg_loss = 0
                profit_factor = 0
        else:
            win_rate = 0
            avg_profit = 0
            avg_loss = 0
            profit_factor = 0
    else:
        win_rate = 0
        avg_profit = 0
        avg_loss = 0
        profit_factor = 0

    # Combine all metrics
    metrics = {
        'Initial Capital': initial_capital,
        'Final Portfolio Value': final_value,
        'Total Return (%)': total_return * 100,
        'Buy & Hold Return (%)': buy_hold_return * 100 if buy_hold_return is not None else None,
        'Strategy Outperformance (%)': (total_return - buy_hold_return) * 100 if buy_hold_return is not None else None,
        'Annualized Return (%)': annualized_return * 100 if annualized_return is not None else None,
        'Volatility (%)': volatility * 100,
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown (%)': max_drawdown * 100,
        'Number of Trades': num_trades,
        'Win Rate (%)': win_rate * 100,
        'Avg Profit ($)': avg_profit,
        'Avg Loss ($)': avg_loss,
        'Profit Factor': profit_factor
    }

    return metrics

# Visualize results
def visualize_results(data, performance, trades_df, metrics, strategy_name):
    """
    Create visualizations of trading performance.

    Parameters:
    - data: Original data with sentiment and signals
    - performance: Portfolio performance data
    - trades_df: DataFrame with trade details
    - metrics: Dictionary with performance metrics
    - strategy_name: Name of the strategy for plot titles
    """
    # Create a figure with multiple subplots
    fig, axes = plt.subplots(3, 1, figsize=(12, 18), gridspec_kw={'height_ratios': [2, 1, 1]})

    # Plot 1: Stock price with buy/sell signals
    ax1 = axes[0]

    # Plot the stock price
    price_col = 'Close' if 'Close' in data.columns else 'Adj Close' if 'Adj Close' in data.columns else None

    if price_col:
        ax1.plot(data['Date'], data[price_col], label='Stock Price', color='blue')

        # Add buy signals
        buy_signals = data[data['Signal'] == 1]
        if not buy_signals.empty:
            ax1.scatter(buy_signals['Date'], buy_signals[price_col],
                       color='green', label='Buy Signal', marker='^', s=100)

        # Add sell signals
        sell_signals = data[data['Signal'] == -1]
        if not sell_signals.empty:
            ax1.scatter(sell_signals['Date'], sell_signals[price_col],
                       color='red', label='Sell Signal', marker='v', s=100)

        ax1.set_title(f'Stock Price with {strategy_name} Trading Signals', fontsize=14)
        ax1.set_ylabel('Price ($)', fontsize=12)
        ax1.legend()
        ax1.grid(True, alpha=0.6)

    # Plot 2: Portfolio value over time
    ax2 = axes[1]
    ax2.plot(performance['Date'], performance['Portfolio_Value'], label='Portfolio Value', color='purple')

    if trades_df is not None and not trades_df.empty:
        # Mark buy trades
        buy_trades = trades_df[trades_df['Action'] == 'BUY']
        if not buy_trades.empty:
            ax2.scatter(buy_trades['Date'], buy_trades['Portfolio_Value'],
                      color='green', label='Buy', marker='^', s=100)

        # Mark sell trades
        sell_trades = trades_df[trades_df['Action'] == 'SELL']
        if not sell_trades.empty:
            ax2.scatter(sell_trades['Date'], sell_trades['Portfolio_Value'],
                       color='red', label='Sell', marker='v', s=100)

    ax2.set_title('Portfolio Value Over Time', fontsize=14)
    ax2.set_ylabel('Value ($)', fontsize=12)
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # Plot 3: Sentiment over time
    ax3 = axes[2]
    ax3.plot(data['Date'], data['Positive_Ratio'], label='Positive Sentiment', color='green')
    ax3.plot(data['Date'], data['Negative_Ratio'], label='Negative Sentiment', color='red')

    # Add a threshold line if using threshold strategy
    if 'threshold' in strategy_name.lower():
        threshold = 0.6  # Adjust based on your strategy
        ax3.axhline(y=threshold, color='black', linestyle='--', alpha=0.5, label=f'Threshold ({threshold})')

    ax3.set_title('Sentiment Ratios Over Time', fontsize=14)
    ax3.set_xlabel('Date', fontsize=12)
    ax3.set_ylabel('Sentiment Ratio', fontsize=12)
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    # Add metrics as text
    plt.figtext(0.01, 0.01, format_metrics(metrics), fontsize=12,
               bbox=dict(facecolor='white', alpha=0.8), verticalalignment='bottom')

    plt.tight_layout()

    # Save the figure
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    filename = f"trading_results_{strategy_name.replace(' ', '_')}_{timestamp}.png"
    plt.savefig(filename, dpi=300, bbox_inches='tight')

    print(f"Visualization saved as {filename}")

    # Display the figure
    plt.show()

# Format metrics for display
def format_metrics(metrics):
    """Format the metrics dictionary as a readable string."""
    text = "Performance Metrics:\n\n"
    for key, value in metrics.items():
        if isinstance(value, float):
            text += f"{key}: {value:.2f}\n"
        else:
            text += f"{key}: {value}\n"
    return text

# Main function to run the entire analysis
def run_sentiment_trading_simulation(sentiment_file, stock_price_file, strategy='threshold',
                                     threshold=0.6, initial_capital=10000):
    """
    Run a complete sentiment-based trading simulation.

    Parameters:
    - sentiment_file: Path to the sentiment analysis results
    - stock_price_file: Path to the stock price historical data
    - strategy: Trading strategy to use ('threshold', 'relative', or 'trend')
    - threshold: Sentiment threshold for buy/sell signals
    - initial_capital: Starting capital for the simulation
    """
    print("=" * 80)
    print(f"RUNNING SENTIMENT-BASED TRADING SIMULATION")
    print(f"Strategy: {strategy}, Threshold: {threshold}, Initial Capital: ${initial_capital}")
    print("=" * 80)

    # Step 1: Load and merge data
    merged_data = load_data(sentiment_file, stock_price_file)

    # Step 2: Generate trading signals
    signals_data = generate_trading_signals(merged_data, strategy, threshold)

    # Step 3: Run backtest simulation
    performance, trades, metrics = backtest_strategy(signals_data, initial_capital)

    # Step 4: Print performance metrics
    print("\nPERFORMANCE METRICS:")
    print("-" * 40)
    for key, value in metrics.items():
        if isinstance(value, float):
            print(f"{key}: {value:.2f}")
        else:
            print(f"{key}: {value}")

    # Step 5: Visualize results
    strategy_name = f"{strategy.capitalize()} Strategy (threshold={threshold})"
    visualize_results(signals_data, performance, trades, metrics, strategy_name)

    # Step 6: Save detailed results
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

    # Save signals data
    signals_data.to_csv(f"trading_signals_{strategy}_{timestamp}.csv", index=False)

    # Save performance data
    performance.to_csv(f"trading_performance_{strategy}_{timestamp}.csv", index=False)

    # Save trades data
    if not trades.empty:
        trades.to_csv(f"trading_trades_{strategy}_{timestamp}.csv", index=False)

    print(f"\nDetailed results saved with timestamp {timestamp}")

    return performance, trades, metrics

# Example usage
if __name__ == "__main__":
    # Replace these with your actual file paths
    sentiment_file = "/content/tesla_daily_sentiment_20250403-044037.csv"
    stock_price_file = "/content/TSLA_historical_prices.csv"

    # Run the simulation with default parameters
    performance, trades, metrics = run_sentiment_trading_simulation(
        sentiment_file=sentiment_file,
        stock_price_file=stock_price_file,
        strategy='threshold',
        threshold=0.6,
        initial_capital=10000
    )

    # Optionally, compare multiple strategies
    strategies = [
        {'name': 'threshold', 'threshold': 0.4},
        {'name': 'relative', 'threshold': 0.3},
        {'name': 'trend', 'threshold': 0.5}
    ]

    results = {}

    for strategy in strategies:
        print(f"\nEvaluating {strategy['name']} strategy...")
        _, _, metrics = run_sentiment_trading_simulation(
            sentiment_file=sentiment_file,
            stock_price_file=stock_price_file,
            strategy=strategy['name'],
            threshold=strategy['threshold'],
            initial_capital=10000
        )

        results[strategy['name']] = metrics

    # Compare strategy performance
    print("\nSTRATEGY COMPARISON:")
    print("-" * 80)

    metrics_to_compare = ['Total Return (%)', 'Annualized Return (%)',
                          'Sharpe Ratio', 'Max Drawdown (%)', 'Win Rate (%)']

    for metric in metrics_to_compare:
        print(f"\n{metric}:")
        for strategy_name, strategy_metrics in results.items():
            value = strategy_metrics.get(metric)
            if value is not None:
                print(f"  {strategy_name}: {value:.2f}")
            else:
                print(f"  {strategy_name}: N/A")

In [None]:

# Example usage
if __name__ == "__main__":
    # Replace these with your actual file paths
    sentiment_file = "/content/tesla_daily_sentiment_20250403-044037.csv"
    stock_price_file = "/content/TSLA_historical_prices.csv"

    # Run the simulation with default parameters
    performance, trades, metrics = run_sentiment_trading_simulation(
        sentiment_file=sentiment_file,
        stock_price_file=stock_price_file,
        strategy='threshold',
        threshold=0.4,
        initial_capital=100
    )
    #
    # Optionally, compare multiple strategies
    strategies = [
        {'name': 'threshold', 'threshold': 0.4},
        {'name': 'relative', 'threshold': 0.3},
        {'name': 'trend', 'threshold': 0.5}
    ]

    results = {}

    for strategy in strategies:
        print(f"\nEvaluating {strategy['name']} strategy...")
        _, _, metrics = run_sentiment_trading_simulation(
            sentiment_file=sentiment_file,
            stock_price_file=stock_price_file,
            strategy=strategy['name'],
            threshold=strategy['threshold'],
            initial_capital=10000
        )

        results[strategy['name']] = metrics

    # Compare strategy performance
    print("\nSTRATEGY COMPARISON:")
    print("-" * 80)

    metrics_to_compare = ['Total Return (%)', 'Annualized Return (%)',
                          'Sharpe Ratio', 'Max Drawdown (%)', 'Win Rate (%)']

    for metric in metrics_to_compare:
        print(f"\n{metric}:")
        for strategy_name, strategy_metrics in results.items():
            value = strategy_metrics.get(metric)
            if value is not None:
                print(f"  {strategy_name}: {value:.2f}")
            else:
                print(f"  {strategy_name}: N/A")

## Buy Only sentiments

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os

# Load sentiment data (adjust file path as needed)
# This assumes you've already run the sentiment analysis script
def load_data(sentiment_file, stock_price_file):
    """
    Load and prepare both sentiment and stock price data.

    Parameters:
    - sentiment_file: Path to the sentiment analysis results (daily aggregation)
    - stock_price_file: Path to the stock price historical data

    Returns:
    - merged_data: A dataframe with both sentiment and price data
    """
    print(f"Loading sentiment data from {sentiment_file}")

    try:
        sentiment_df = pd.read_csv(sentiment_file)

        # Ensure date is in datetime format
        sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])

        print(f"Successfully loaded sentiment data with {len(sentiment_df)} rows")
        print(f"Sentiment data columns: {sentiment_df.columns.tolist()}")
    except Exception as e:
        print(f"Error loading sentiment data: {e}")
        # Check if the file exists
        import os
        if not os.path.exists(sentiment_file):
            print(f"The file {sentiment_file} does not exist.")
            print("Available files in current directory:")
            print(os.listdir('.'))
        raise

    print(f"Loading stock price data from {stock_price_file}")
    try:
        prices_df = pd.read_csv(stock_price_file)

        # Ensure date is in datetime format
        date_col = None
        possible_date_cols = ['Date', 'date', 'DATE', 'Timestamp', 'timestamp', 'Time', 'time']

        for col in possible_date_cols:
            if col in prices_df.columns:
                date_col = col
                break

        if date_col is None:
            print(f"Warning: Could not find date column. Available columns: {prices_df.columns.tolist()}")
            # Try to use the first column as date
            date_col = prices_df.columns[0]
            print(f"Using {date_col} as date column")

        prices_df['Date'] = pd.to_datetime(prices_df[date_col])

        # If we used a different column for the date, make sure we keep that original column
        if date_col != 'Date':
            prices_df[date_col] = prices_df['Date']

        print(f"Successfully loaded price data with {len(prices_df)} rows")
        print(f"Price data columns: {prices_df.columns.tolist()}")
    except Exception as e:
        print(f"Error loading stock price data: {e}")
        # Check if the file exists
        import os
        if not os.path.exists(stock_price_file):
            print(f"The file {stock_price_file} does not exist.")
            print("Available files in current directory:")
            print(os.listdir('.'))
        raise

    # Check for required price columns
    required_price_cols = ['Open', 'High', 'Low', 'Close']
    available_price_cols = [col for col in required_price_cols if col in prices_df.columns]

    if not available_price_cols:
        print(f"Warning: No standard price columns found (Open, High, Low, Close)")
        print(f"Available columns: {prices_df.columns.tolist()}")

        # Try to guess price columns based on common patterns
        possible_renames = {
            'open': 'Open',
            'high': 'High',
            'low': 'Low',
            'close': 'Close',
            'adj close': 'Adj Close',
            'adj_close': 'Adj Close',
            'adjusted close': 'Adj Close',
            'adjusted_close': 'Adj Close'
        }

        for old, new in possible_renames.items():
            for col in prices_df.columns:
                if old.lower() in col.lower():
                    prices_df[new] = prices_df[col]
                    print(f"Using {col} as {new}")

    # Merge sentiment data with stock price data
    print(f"Merging data on Date column")
    merged_data = pd.merge(sentiment_df, prices_df, on='Date', how='inner')

    if len(merged_data) == 0:
        print("WARNING: Merged data is empty! Check date formats and ranges.")
        print(f"Sentiment data date range: {sentiment_df['Date'].min()} to {sentiment_df['Date'].max()}")
        print(f"Price data date range: {prices_df['Date'].min()} to {prices_df['Date'].max()}")

        # Try creating a sample of merged data for debugging
        print("\nDate format comparison:")
        print(f"Sentiment date example: {sentiment_df['Date'].iloc[0]} (type: {type(sentiment_df['Date'].iloc[0])})")
        print(f"Price date example: {prices_df['Date'].iloc[0]} (type: {type(prices_df['Date'].iloc[0])})")

        raise ValueError("Merged data is empty. Unable to continue.")

    print(f"Data merged successfully. Found {len(merged_data)} days with both sentiment and price data.")
    return merged_data

# Create trading signals based on sentiment (for buy) and profit targets (for sell)
def generate_trading_signals(data, strategy='threshold', threshold=0.5, profit_target=0.25, stop_loss=0.05):
    """
    Generate buy signals based on sentiment data.
    Sell signals are determined by profit targets, not sentiment.

    Parameters:
    - data: DataFrame with sentiment and price data
    - strategy: Strategy to use ('threshold', 'relative', or 'trend')
    - threshold: Sentiment threshold for buy decisions
    - profit_target: Target profit percentage for selling (e.g., 0.1 = 10%)
    - stop_loss: Optional stop loss percentage (e.g., 0.05 = 5%)

    Returns:
    - data: DataFrame with added signal column
    """
    # Make a copy to avoid modifying the original
    df = data.copy()

    # Initialize signal column (0 = hold, 1 = buy, -1 = sell)
    df['Signal'] = 0

    # Generate BUY signals based on sentiment
    if strategy == 'threshold':
        # Buy when positive sentiment exceeds threshold
        df.loc[df['Positive_Ratio'] >= threshold, 'Signal'] = 1

    elif strategy == 'relative':
        # Compare positive vs negative sentiment
        # Buy when positive significantly exceeds negative
        df['Sentiment_Difference'] = df['Positive_Ratio'] - df['Negative_Ratio']
        df.loc[df['Sentiment_Difference'] >= threshold, 'Signal'] = 1

    elif strategy == 'trend':
        # Look at sentiment trends over time
        window = 3  # Consider last 3 days

        # Calculate moving averages of sentiment
        df['Positive_MA'] = df['Positive_Ratio'].rolling(window=window).mean()

        # Generate signals based on trend
        df['Positive_Trend'] = df['Positive_Ratio'] > df['Positive_MA']

        # Buy when positive sentiment is trending up and above threshold
        df.loc[(df['Positive_Trend']) & (df['Positive_Ratio'] > threshold), 'Signal'] = 1

    else:
        raise ValueError(f"Unknown strategy: {strategy}")

    # Count the number of signals generated
    buy_signals = (df['Signal'] == 1).sum()
    hold_signals = (df['Signal'] == 0).sum()

    print(f"Strategy '{strategy}' generated:")
    print(f"- Buy signals: {buy_signals} ({buy_signals/len(df)*100:.1f}%)")
    print(f"- Hold signals: {hold_signals} ({hold_signals/len(df)*100:.1f}%)")
    print(f"- Sell signals will be generated based on profit targets during simulation")

    # Note: We don't generate sell signals here - they'll be determined dynamically
    # during the backtest based on profit targets

    return df

# Run a backtest simulation of the trading strategy with profit targets
def backtest_strategy(data, initial_capital=100, position_size=0.25, commission=0.001,
                      profit_target=0.25, stop_loss=0.05, max_holding_days=30):
    """
    Simulate trading based on:
    - Buy signals from sentiment
    - Sell signals based on profit targets or stop losses

    Parameters:
    - data: DataFrame with price data and trading signals
    - initial_capital: Starting capital for the simulation
    - position_size: Percentage of capital to allocate per trade
    - commission: Commission rate per trade
    - profit_target: Target profit percentage for selling (e.g., 0.1 = 10%)
    - stop_loss: Stop loss percentage (e.g., 0.05 = 5%)
    - max_holding_days: Maximum days to hold a position before forced selling

    Returns:
    - performance: DataFrame with portfolio value over time
    - metrics: Dictionary with performance metrics
    """
    # Make a copy of the data
    df = data.copy()

    # Ensure we have price data (adjust column names if needed)
    price_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close']
    available_columns = [col for col in price_columns if col in df.columns]

    if not available_columns:
        raise ValueError("No price data columns found. Need at least one of: Open, High, Low, Close, Adj Close")

    # Use the first available price column
    price_col = available_columns[0]
    print(f"Using '{price_col}' column for price data")

    # Initialize portfolio tracking
    df['Position'] = 0  # Number of shares held
    df['Capital'] = initial_capital
    df['Portfolio_Value'] = initial_capital
    df['Trade_Type'] = ''  # Track reason for trades

    # Track trades
    trades = []

    # Simulate trading day by day
    position = 0  # Current position (number of shares)
    capital = initial_capital  # Current cash
    entry_price = 0  # Price at which we bought
    days_held = 0  # Number of days we've held the position
    waiting_period = 0  # Days to wait after selling before buying again

    for i in range(1, len(df)):
        yesterday = df.iloc[i-1]
        today = df.iloc[i]

        # Get today's price and sentiment signal
        price = today[price_col]
        buy_signal = today['Signal'] == 1

        # Calculate portfolio value (cash + shares)
        portfolio_value = capital + position * price

        # SELLING LOGIC: Check if we should sell based on profit target or stop loss
        if position > 0:
            days_held += 1
            current_return = (price / entry_price) - 1

            # Sell if profit target reached
            if current_return >= profit_target:
                # Calculate trade value
                trade_value = position * price
                commission_cost = trade_value * commission

                # Execute sell
                capital += (trade_value - commission_cost)

                # Record trade
                trades.append({
                    'Date': today['Date'],
                    'Action': 'SELL',
                    'Reason': 'PROFIT_TARGET',
                    'Price': price,
                    'Entry_Price': entry_price,
                    'Return_Pct': current_return * 100,
                    'Shares': position,
                    'Value': trade_value,
                    'Commission': commission_cost,
                    'Days_Held': days_held,
                    'Portfolio_Value': portfolio_value
                })

                # Reset position
                position = 0
                entry_price = 0
                days_held = 0
                waiting_period = 5  # Wait 5 days before considering buying again
                df.loc[df.index[i], 'Trade_Type'] = 'PROFIT_TAKE'

            # Sell if stop loss triggered
            elif current_return <= -stop_loss:
                # Calculate trade value
                trade_value = position * price
                commission_cost = trade_value * commission

                # Execute sell
                capital += (trade_value - commission_cost)

                # Record trade
                trades.append({
                    'Date': today['Date'],
                    'Action': 'SELL',
                    'Reason': 'STOP_LOSS',
                    'Price': price,
                    'Entry_Price': entry_price,
                    'Return_Pct': current_return * 100,
                    'Shares': position,
                    'Value': trade_value,
                    'Commission': commission_cost,
                    'Days_Held': days_held,
                    'Portfolio_Value': portfolio_value
                })

                # Reset position
                position = 0
                entry_price = 0
                days_held = 0
                waiting_period = 10  # Wait longer after a stop loss (10 days)
                df.loc[df.index[i], 'Trade_Type'] = 'STOP_LOSS'

            # Sell if maximum holding period reached
            elif days_held >= max_holding_days:
                # Calculate trade value
                trade_value = position * price
                commission_cost = trade_value * commission

                # Execute sell
                capital += (trade_value - commission_cost)

                # Record trade
                trades.append({
                    'Date': today['Date'],
                    'Action': 'SELL',
                    'Reason': 'MAX_HOLDING',
                    'Price': price,
                    'Entry_Price': entry_price,
                    'Return_Pct': current_return * 100,
                    'Shares': position,
                    'Value': trade_value,
                    'Commission': commission_cost,
                    'Days_Held': days_held,
                    'Portfolio_Value': portfolio_value
                })

                # Reset position
                position = 0
                entry_price = 0
                days_held = 0
                waiting_period = 3  # Short waiting period
                df.loc[df.index[i], 'Trade_Type'] = 'MAX_HOLDING'

        # BUYING LOGIC: Check if we should buy based on sentiment signal
        elif position == 0 and waiting_period <= 0:
            if buy_signal:
                # Calculate position size
                trade_value = portfolio_value * position_size
                shares_to_buy = trade_value / price

                # Account for commission
                commission_cost = trade_value * commission

                if trade_value + commission_cost <= capital:
                    # Execute buy
                    position = shares_to_buy
                    entry_price = price
                    capital -= (trade_value + commission_cost)
                    days_held = 0

                    # Record trade
                    trades.append({
                        'Date': today['Date'],
                        'Action': 'BUY',
                        'Reason': 'SENTIMENT',
                        'Price': price,
                        'Entry_Price': entry_price,
                        'Return_Pct': 0,
                        'Shares': shares_to_buy,
                        'Value': trade_value,
                        'Commission': commission_cost,
                        'Days_Held': 0,
                        'Portfolio_Value': portfolio_value
                    })

                    df.loc[df.index[i], 'Trade_Type'] = 'BUY'
        else:
            # Decrease waiting period counter
            waiting_period = max(0, waiting_period - 1)

        # Update tracking
        df.loc[df.index[i], 'Position'] = position
        df.loc[df.index[i], 'Capital'] = capital
        df.loc[df.index[i], 'Portfolio_Value'] = capital + position * price

    # Create a trades dataframe
    trades_df = pd.DataFrame(trades) if trades else pd.DataFrame()

    # Calculate performance metrics
    metrics = calculate_performance_metrics(df, initial_capital, trades_df)

    return df, trades_df, metrics

# Calculate performance metrics for the strategy
def calculate_performance_metrics(performance_data, initial_capital, trades_df):
    """
    Calculate key performance metrics for the trading strategy.

    Parameters:
    - performance_data: DataFrame with portfolio value over time
    - initial_capital: Starting capital amount
    - trades_df: DataFrame with trade details

    Returns:
    - metrics: Dictionary with performance metrics
    """
    metrics = {}

    # Extract performance data
    df = performance_data.copy()

    # Calculate basic metrics
    final_value = df['Portfolio_Value'].iloc[-1]
    total_return = (final_value - initial_capital) / initial_capital

    # Calculate buy & hold return for comparison
    first_price = df.iloc[0]['Close'] if 'Close' in df.columns else df.iloc[0]['Adj Close'] if 'Adj Close' in df.columns else None
    last_price = df.iloc[-1]['Close'] if 'Close' in df.columns else df.iloc[-1]['Adj Close'] if 'Adj Close' in df.columns else None

    if first_price is not None and last_price is not None:
        buy_hold_return = (last_price - first_price) / first_price
    else:
        buy_hold_return = None

    # Calculate annualized return
    days = (df['Date'].iloc[-1] - df['Date'].iloc[0]).days
    years = days / 365.25
    annualized_return = (1 + total_return) ** (1 / years) - 1 if years > 0 else None

    # Calculate volatility (standard deviation of daily returns)
    df['Daily_Return'] = df['Portfolio_Value'].pct_change()
    volatility = df['Daily_Return'].std() * (252 ** 0.5)  # Annualized

    # Calculate Sharpe Ratio (assuming risk-free rate of 0.02)
    risk_free_rate = 0.02
    sharpe_ratio = (annualized_return - risk_free_rate) / volatility if volatility > 0 else None

    # Maximum drawdown
    df['Cumulative_Max'] = df['Portfolio_Value'].cummax()
    df['Drawdown'] = (df['Portfolio_Value'] - df['Cumulative_Max']) / df['Cumulative_Max']
    max_drawdown = df['Drawdown'].min()

    # Trade statistics
    num_trades = len(trades_df)

    if num_trades > 0:
        winning_trades = trades_df[trades_df['Action'] == 'SELL'].copy()
        if not winning_trades.empty:
            # For each sell, find the corresponding buy
            buy_prices = []
            for idx, sell_trade in winning_trades.iterrows():
                # Find the most recent buy before this sell
                buy_trades = trades_df[(trades_df['Action'] == 'BUY') &
                                      (trades_df['Date'] < sell_trade['Date'])]
                if not buy_trades.empty:
                    most_recent_buy = buy_trades.iloc[-1]
                    buy_prices.append(most_recent_buy['Price'])
                else:
                    buy_prices.append(None)

            winning_trades['Buy_Price'] = buy_prices
            winning_trades = winning_trades.dropna(subset=['Buy_Price'])

            if not winning_trades.empty:
                winning_trades['Profit'] = (winning_trades['Price'] - winning_trades['Buy_Price']) * winning_trades['Shares']
                winning_trades['Profit'] -= winning_trades['Commission']  # Account for commission

                win_count = (winning_trades['Profit'] > 0).sum()
                loss_count = (winning_trades['Profit'] <= 0).sum()

                win_rate = win_count / len(winning_trades) if len(winning_trades) > 0 else 0

                avg_profit = winning_trades[winning_trades['Profit'] > 0]['Profit'].mean() if win_count > 0 else 0
                avg_loss = winning_trades[winning_trades['Profit'] <= 0]['Profit'].mean() if loss_count > 0 else 0

                profit_factor = abs(avg_profit / avg_loss) if avg_loss != 0 else float('inf')
            else:
                win_rate = 0
                avg_profit = 0
                avg_loss = 0
                profit_factor = 0
        else:
            win_rate = 0
            avg_profit = 0
            avg_loss = 0
            profit_factor = 0
    else:
        win_rate = 0
        avg_profit = 0
        avg_loss = 0
        profit_factor = 0

    # Combine all metrics
    metrics = {
        'Initial Capital': initial_capital,
        'Final Portfolio Value': final_value,
        'Total Return (%)': total_return * 100,
        'Buy & Hold Return (%)': buy_hold_return * 100 if buy_hold_return is not None else None,
        'Strategy Outperformance (%)': (total_return - buy_hold_return) * 100 if buy_hold_return is not None else None,
        'Annualized Return (%)': annualized_return * 100 if annualized_return is not None else None,
        'Volatility (%)': volatility * 100,
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown (%)': max_drawdown * 100,
        'Number of Trades': num_trades,
        'Win Rate (%)': win_rate * 100,
        'Avg Profit ($)': avg_profit,
        'Avg Loss ($)': avg_loss,
        'Profit Factor': profit_factor
    }

    return metrics

# Visualize results
def visualize_results(data, performance, trades_df, metrics, strategy_name):
    """
    Create visualizations of trading performance.

    Parameters:
    - data: Original data with sentiment and signals
    - performance: Portfolio performance data
    - trades_df: DataFrame with trade details
    - metrics: Dictionary with performance metrics
    - strategy_name: Name of the strategy for plot titles
    """
    try:
        # Create a figure with multiple subplots
        fig, axes = plt.subplots(3, 1, figsize=(8, 12), gridspec_kw={'height_ratios': [2, 1, 1]})

        # Plot 1: Stock price with buy/sell signals
        ax1 = axes[0]

        # Plot the stock price
        price_col = 'Close' if 'Close' in data.columns else 'Adj Close' if 'Adj Close' in data.columns else None

        if price_col:
            ax1.plot(data['Date'], data[price_col], label='Stock Price', color='blue')

            # Find buy and sell points from the trades dataframe instead of signals
            if trades_df is not None and not trades_df.empty:
                # Add buy signals
                buy_trades = trades_df[trades_df['Action'] == 'BUY']
                if not buy_trades.empty:
                    ax1.scatter(buy_trades['Date'], buy_trades['Price'],
                               color='green', label='Buy', marker='^', s=100)

                # Add sell signals
                sell_trades = trades_df[trades_df['Action'] == 'SELL']
                if not sell_trades.empty:
                    ax1.scatter(sell_trades['Date'], sell_trades['Price'],
                               color='red', label='Sell', marker='v', s=100)

            ax1.set_title(f'Stock Price with {strategy_name} Trading Signals', fontsize=14)
            ax1.set_ylabel('Price ($)', fontsize=12)
            ax1.legend()
            ax1.grid(True, alpha=0.3)

        # Plot 2: Portfolio value over time
        ax2 = axes[1]
        ax2.plot(performance['Date'], performance['Portfolio_Value'], label='Portfolio Value', color='purple')

        # Add profit target and stop loss references if available
        if 'Entry_Price' in trades_df.columns and price_col:
            for _, trade in trades_df.iterrows():
                if trade['Action'] == 'BUY':
                    # Plot profit target level
                    if 'profit_target' in metrics:
                        profit_level = trade['Price'] * (1 + metrics['profit_target'])
                        ax1.axhline(y=profit_level, color='green', linestyle=':', alpha=0.3)

                    # Plot stop loss level
                    if 'stop_loss' in metrics:
                        stop_level = trade['Price'] * (1 - metrics['stop_loss'])
                        ax1.axhline(y=stop_level, color='red', linestyle=':', alpha=0.3)

        ax2.set_title('Portfolio Value Over Time', fontsize=14)
        ax2.set_ylabel('Value ($)', fontsize=12)
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        # Plot 3: Sentiment over time
        ax3 = axes[2]
        ax3.plot(data['Date'], data['Positive_Ratio'], label='Positive Sentiment', color='green')
        ax3.plot(data['Date'], data['Negative_Ratio'], label='Negative Sentiment', color='red')

        # Add a threshold line for buy signals
        if 'threshold' in strategy_name.lower():
            # Extract the threshold value from the strategy name or use default
            threshold = 0.4  # Default

            # Try to extract from strategy name if available
            threshold_str = strategy_name.split('threshold=')
            if len(threshold_str) > 1:
                try:
                    threshold = float(threshold_str[1].split(')')[0])
                except:
                    pass

            ax3.axhline(y=threshold, color='black', linestyle='--', alpha=0.5, label=f'Buy Threshold ({threshold})')

        ax3.set_title('Sentiment Ratios Over Time', fontsize=14)
        ax3.set_xlabel('Date', fontsize=8)
        ax3.set_ylabel('Sentiment Ratio', fontsize=8)
        ax3.legend()
        ax3.grid(True, alpha=0.7)

        # Add metrics as text
        plt.figtext(0.01, 0.01, format_metrics(metrics), fontsize=6,
                   bbox=dict(facecolor='white', alpha=0.8), verticalalignment='bottom')

        plt.tight_layout()

        # Try to save the figure in a safe way
        try:
            # Safe filename conversion
            safe_name = ''.join([c if c.isalnum() else '_' for c in strategy_name])
            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
            filename = f"trading_results_{safe_name}_{timestamp}.png"

            # Try different paths in case of permission issues
            try_paths = [
                filename,  # Current directory
                f"/tmp/{filename}",  # Temp directory
                f"/content/{filename}",  # Google Colab directory
            ]

            saved = False
            for path in try_paths:
                try:
                    plt.savefig(path, dpi=300, bbox_inches='tight')
                    print(f"Visualization saved as {path}")
                    saved = True
                    break
                except Exception as e:
                    print(f"Could not save to {path}: {e}")

            if not saved:
                print("Warning: Could not save visualization to file. Displaying only.")

        except Exception as e:
            print(f"Warning: Could not save visualization: {e}")

        # Display the figure
        plt.show()

    except Exception as e:
        print(f"Warning: Could not create visualization: {e}")
        print("Continuing without visualization...")


# Format metrics for display
def format_metrics(metrics):
    """Format the metrics dictionary as a readable string."""
    text = "Performance Metrics:\n\n"
    for key, value in metrics.items():
        if isinstance(value, float):
            text += f"{key}: {value:.2f}\n"
        else:
            text += f"{key}: {value}\n"
    return text

# Main function to run the entire analysis
def run_sentiment_trading_simulation(sentiment_file, stock_price_file, strategy='threshold',
                                     threshold=0.5, initial_capital=100,
                                     profit_target=0.25, stop_loss=0.05, max_holding_days=30):
    """
    Run a complete sentiment-based trading simulation with profit targets.

    Parameters:
    - sentiment_file: Path to the sentiment analysis results
    - stock_price_file: Path to the stock price historical data
    - strategy: Trading strategy to use ('threshold', 'relative', or 'trend')
    - threshold: Sentiment threshold for buy signals
    - initial_capital: Starting capital for the simulation
    - profit_target: Target profit percentage for selling (e.g., 0.1 = 10%)
    - stop_loss: Stop loss percentage (e.g., 0.05 = 5%)
    - max_holding_days: Maximum days to hold a position
    """
    print("=" * 80)
    print(f"RUNNING SENTIMENT-BASED TRADING SIMULATION WITH PROFIT TARGETS")
    print(f"Strategy: {strategy}, Sentiment Threshold: {threshold}, Initial Capital: ${initial_capital}")
    print(f"Profit Target: {profit_target*100:.1f}%, Stop Loss: {stop_loss*100:.1f}%, Max Holding: {max_holding_days} days")
    print("=" * 80)

    # Step 1: Load and merge data
    merged_data = load_data(sentiment_file, stock_price_file)

    # Step 2: Generate trading signals (only buy signals based on sentiment)
    signals_data = generate_trading_signals(merged_data, strategy, threshold, profit_target, stop_loss)

    # Step 3: Run backtest simulation with profit targets
    performance, trades, metrics = backtest_strategy(
        signals_data,
        initial_capital=initial_capital,
        profit_target=profit_target,
        stop_loss=stop_loss,
        max_holding_days=max_holding_days
    )

    # Step 4: Print performance metrics
    print("\nPERFORMANCE METRICS:")
    print("-" * 40)
    for key, value in metrics.items():
        if isinstance(value, float):
            print(f"{key}: {value:.2f}")
        else:
            print(f"{key}: {value}")

    # Step 5: Visualize results
    strategy_name = f"{strategy.capitalize()} Buy / {profit_target*100:.0f}% Profit Target"
    visualize_results(signals_data, performance, trades, metrics, strategy_name)

    # Step 6: Analyze trade types
    if not trades.empty and 'Reason' in trades.columns:
        reason_counts = trades[trades['Action'] == 'SELL']['Reason'].value_counts()
        print("\nSELL TRADE ANALYSIS:")
        print("-" * 40)
        total_sells = len(trades[trades['Action'] == 'SELL'])
        for reason, count in reason_counts.items():
            print(f"{reason}: {count} trades ({count/total_sells*100:.1f}%)")

    # Step 7: Save detailed results
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

    # Save signals data
    signals_data.to_csv(f"trading_signals_{strategy}_profit{int(profit_target*100)}_{timestamp}.csv", index=False)

    # Save performance data
    performance.to_csv(f"trading_performance_{strategy}_profit{int(profit_target*100)}_{timestamp}.csv", index=False)

    # Save trades data
    if not trades.empty:
        trades.to_csv(f"trading_trades_{strategy}_profit{int(profit_target*100)}_{timestamp}.csv", index=False)

    print(f"\nDetailed results saved with timestamp {timestamp}")

    return performance, trades, metrics

# Example usage
if __name__ == "__main__":
    # Replace these with your actual file paths
    sentiment_file = "tesla_daily_sentiment_20250403-044037.csv"
    stock_price_file = "TSLA_historical_prices.csv"

    # Run the simulation with profit-taking strategy
    performance, trades, metrics = run_sentiment_trading_simulation(
        sentiment_file=sentiment_file,
        stock_price_file=stock_price_file,
        strategy='threshold',
        threshold=0.4,  # Sentiment threshold for buying
        initial_capital=100,
        profit_target=0.25,  # Sell at 10% profit
        stop_loss=0.03,      # Cut losses at 5%
        max_holding_days=10  # Don't hold longer than 30 days
    )

    # Optionally, compare different profit targets and stop losses
    profit_scenarios = [
        {'profit': 0.05, 'stop': 0.03, 'name': 'Conservative'},
        {'profit': 0.10, 'stop': 0.05, 'name': 'Balanced'},
        {'profit': 0.15, 'stop': 0.08, 'name': 'Aggressive'},
        {'profit': 0.20, 'stop': 0.10, 'name': 'High Risk'}
    ]

    results = {}

    for scenario in profit_scenarios:
        print(f"\nEvaluating {scenario['name']} profit/loss scenario...")
        _, _, metrics = run_sentiment_trading_simulation(
            sentiment_file=sentiment_file,
            stock_price_file=stock_price_file,
            strategy='threshold',
            threshold=0.4,  # Keep sentiment threshold constant
            initial_capital=100,
            profit_target=scenario['profit'],
            stop_loss=scenario['stop'],
            max_holding_days=10
        )

        results[scenario['name']] = metrics

    # Compare performance across different profit/loss scenarios
    print("\nPROFIT TARGET COMPARISON:")
    print("-" * 80)

    metrics_to_compare = ['Total Return (%)', 'Annualized Return (%)',
                          'Sharpe Ratio', 'Max Drawdown (%)', 'Win Rate (%)',
                          'Number of Trades']

    for metric in metrics_to_compare:
        print(f"\n{metric}:")
        for scenario_name, scenario_metrics in results.items():
            value = scenario_metrics.get(metric)
            if value is not None:
                print(f"  {scenario_name}: {value:.2f}")
            else:
                print(f"  {scenario_name}: N/A")

# Grid search

In [None]:
if __name__ == "__main__":
    # Replace these with your actual file paths
    sentiment_file = "tesla_daily_sentiment_20250403-044037.csv"
    stock_price_file = "TSLA_historical_prices.csv"

    # Define parameter ranges for grid search
    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    holding_days = [5, 10, 15, 20, 25, 30]
    profit_targets = [0.05, 0.1, 0.15, 0.2, 0.25]
    stop_losses = [0.03, 0.05, 0.07, 0.1]

    # Fixed parameters
    initial_capital = 100
    strategy = 'threshold'

    # Store all results
    all_results = []

    # Disable visualizations for grid search
    import matplotlib.pyplot as plt
    original_show = plt.show
    plt.show = lambda: None  # Override plt.show to do nothing

    # Track best performing parameter combinations
    best_return = -float('inf')
    best_params = None
    best_sharpe = -float('inf')
    best_sharpe_params = None

    # Progress tracking
    total_combinations = len(thresholds) * len(holding_days) * len(profit_targets) * len(stop_losses)
    current_combination = 0

    print(f"Running grid search with {total_combinations} parameter combinations...")
    print("=" * 80)

    # Run grid search
    for threshold in thresholds:
        for max_holding in holding_days:
            for profit_target in profit_targets:
                for stop_loss in stop_losses:
                    current_combination += 1
                    print(f"Testing combination {current_combination}/{total_combinations}: "
                          f"threshold={threshold}, holding_days={max_holding}, "
                          f"profit_target={profit_target}, stop_loss={stop_loss}")

                    try:
                        # Suppress detailed output during grid search
                        import sys
                        original_stdout = sys.stdout
                        sys.stdout = open('/dev/null', 'w')  # Redirect output to null

                        # Run simulation with current parameters
                        _, _, metrics = run_sentiment_trading_simulation(
                            sentiment_file=sentiment_file,
                            stock_price_file=stock_price_file,
                            strategy=strategy,
                            threshold=threshold,
                            initial_capital=initial_capital,
                            profit_target=profit_target,
                            stop_loss=stop_loss,
                            max_holding_days=max_holding
                        )

                        # Restore output
                        sys.stdout.close()
                        sys.stdout = original_stdout

                        # Store results
                        result = {
                            'Threshold': threshold,
                            'Max Holding Days': max_holding,
                            'Profit Target': profit_target,
                            'Stop Loss': stop_loss,
                            **metrics
                        }
                        all_results.append(result)

                        # Check if this is the best return so far
                        total_return = metrics.get('Total Return (%)', -float('inf'))
                        if total_return > best_return:
                            best_return = total_return
                            best_params = {
                                'Threshold': threshold,
                                'Max Holding Days': max_holding,
                                'Profit Target': profit_target,
                                'Stop Loss': stop_loss
                            }

                        # Check if this is the best Sharpe ratio so far
                        sharpe = metrics.get('Sharpe Ratio', -float('inf'))
                        if sharpe > best_sharpe:
                            best_sharpe = sharpe
                            best_sharpe_params = {
                                'Threshold': threshold,
                                'Max Holding Days': max_holding,
                                'Profit Target': profit_target,
                                'Stop Loss': stop_loss
                            }

                        # Print abbreviated results for this combination
                        print(f"  Total Return: {metrics.get('Total Return (%)', 'N/A'):.2f}% | "
                              f"Sharpe: {metrics.get('Sharpe Ratio', 'N/A'):.2f} | "
                              f"Win Rate: {metrics.get('Win Rate (%)', 'N/A'):.2f}% | "
                              f"Trades: {metrics.get('Number of Trades', 'N/A')}")

                    except Exception as e:
                        print(f"  Error with this combination: {e}")

    # Restore plt.show
    plt.show = original_show

    # Convert results to DataFrame for easier analysis
    import pandas as pd
    results_df = pd.DataFrame(all_results)

    # Save all results to CSV
    results_df.to_csv("trading_strategy_grid_search_results.csv", index=False)
    print("\nComplete grid search results saved to trading_strategy_grid_search_results.csv")

    # Display top 10 strategies by total return
    print("\nTOP 10 STRATEGIES BY TOTAL RETURN:")
    print("=" * 80)

    top_by_return = results_df.sort_values('Total Return (%)', ascending=False).head(10)
    for i, (_, row) in enumerate(top_by_return.iterrows(), 1):
        print(f"{i}. Threshold: {row['Threshold']:.1f}, Max Hold: {row['Max Holding Days']}, "
              f"Profit: {row['Profit Target']:.2f}, Stop: {row['Stop Loss']:.2f} → "
              f"Return: {row['Total Return (%)']:.2f}%, Sharpe: {row['Sharpe Ratio']:.2f}, "
              f"Win Rate: {row['Win Rate (%)']:.2f}%, Trades: {row['Number of Trades']}")

    # Display top 10 strategies by Sharpe ratio
    print("\nTOP 10 STRATEGIES BY SHARPE RATIO:")
    print("=" * 80)

    top_by_sharpe = results_df.sort_values('Sharpe Ratio', ascending=False).head(10)
    for i, (_, row) in enumerate(top_by_sharpe.iterrows(), 1):
        print(f"{i}. Threshold: {row['Threshold']:.1f}, Max Hold: {row['Max Holding Days']}, "
              f"Profit: {row['Profit Target']:.2f}, Stop: {row['Stop Loss']:.2f} → "
              f"Sharpe: {row['Sharpe Ratio']:.2f}, Return: {row['Total Return (%)']:.2f}%, "
              f"Win Rate: {row['Win Rate (%)']:.2f}%, Trades: {row['Number of Trades']}")

    # Display best overall parameters
    print("\nBEST PARAMETERS SUMMARY:")
    print("=" * 80)
    print(f"Best by Total Return: Threshold={best_params['Threshold']}, "
          f"Max Holding Days={best_params['Max Holding Days']}, "
          f"Profit Target={best_params['Profit Target']}, "
          f"Stop Loss={best_params['Stop Loss']} → {best_return:.2f}%")

    print(f"Best by Sharpe Ratio: Threshold={best_sharpe_params['Threshold']}, "
          f"Max Holding Days={best_sharpe_params['Max Holding Days']}, "
          f"Profit Target={best_sharpe_params['Profit Target']}, "
          f"Stop Loss={best_sharpe_params['Stop Loss']} → {best_sharpe:.2f}")

    # Parameter distribution analysis
    print("\nPARAMETER DISTRIBUTION ANALYSIS:")
    print("=" * 80)

    # Get top 20% of strategies by return
    top_fifth = results_df.sort_values('Total Return (%)', ascending=False).head(len(results_df)//5)

    # Analyze distribution of parameters in top performers
    print("Parameter frequency in top 20% of strategies by return:")

    for param in ['Threshold', 'Max Holding Days', 'Profit Target', 'Stop Loss']:
        value_counts = top_fifth[param].value_counts().sort_index()
        print(f"\n{param} distribution:")
        for value, count in value_counts.items():
            print(f"  {value}: {count} strategies ({count/len(top_fifth)*100:.1f}%)")

In [None]:
results_df.to_csv("aaaaaaa.csv", index=False)
print("\nComplete grid search results saved to trading_strategy_grid_search_results.csv")