In [1]:
import pandas as pd
import numpy as np

class BacktestEngine:
    def __init__(self, initial_capital=100000, transaction_cost=0.001):
        """
        Initialize backtester.
        
        Parameters:
        - initial_capital: Starting amount of money
        - transaction_cost: Percentage cost per trade (0.1% = 0.001)
        """
        self.initial_capital = initial_capital
        self.transaction_cost = transaction_cost
        self.portfolio_value = [initial_capital]
        self.cash = initial_capital
        self.shares = 0
        self.trades = []
    
    def process_signal(self, date, signal, price, sentiment_score):
        """
        Execute trade based on signal.
        
        Signals:
        - 1: Buy signal
        - -1: Sell signal
        - 0: Hold
        """
        if signal == 1 and self.cash > 0:
            # Buy: use all available cash
            transaction_cost_amount = self.cash * self.transaction_cost
            buy_amount = self.cash - transaction_cost_amount
            self.shares = buy_amount / price
            self.cash = 0
            
            self.trades.append({
                'date': date,
                'action': 'BUY',
                'price': price,
                'shares': self.shares,
                'sentiment': sentiment_score
            })
        
        elif signal == -1 and self.shares > 0:
            # Sell: liquidate all shares
            sell_proceeds = self.shares * price
            transaction_cost_amount = sell_proceeds * self.transaction_cost
            self.cash = sell_proceeds - transaction_cost_amount
            self.shares = 0
            
            self.trades.append({
                'date': date,
                'action': 'SELL',
                'price': price,
                'proceeds': sell_proceeds - transaction_cost_amount,
                'sentiment': sentiment_score
            })
    
    def calculate_portfolio_value(self, current_price):
        """Calculate current portfolio value (cash + stock holdings)."""
        stock_value = self.shares * current_price
        return self.cash + stock_value
    
    def run_backtest(self, df, signal_column):
        """
        Run full backtest on data.
        
        Parameters:
        - df: DataFrame with date, Close price, and signal column
        - signal_column: Column name containing buy/sell/hold signals
        
        Returns: Results DataFrame with metrics
        """
        daily_values = []
        
        for idx, row in df.iterrows():
            # Process signal
            self.process_signal(
                date=row['date'],
                signal=row[signal_column],
                price=row['Close'],
                sentiment_score=row.get('sentiment', 0)
            )
            
            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(row['Close'])
            daily_values.append({
                'date': row['date'],
                'portfolio_value': portfolio_value,
                'price': row['Close'],
                'cash': self.cash,
                'shares': self.shares
            })
        
        df_results = pd.DataFrame(daily_values)
        return df_results
    
    def calculate_metrics(self, df_results):
        """
        Calculate performance metrics.
        
        Returns: Dictionary with key metrics
        """
        returns = df_results['portfolio_value'].pct_change().dropna()
        
        total_return = (df_results['portfolio_value'].iloc[-1] / self.initial_capital - 1) * 100
        annual_return = (((df_results['portfolio_value'].iloc[-1] / self.initial_capital) ** (252 / len(df_results)) - 1)) * 100
        
        daily_volatility = returns.std()
        annual_volatility = daily_volatility * np.sqrt(252)
        
        sharpe_ratio = (returns.mean() * 252) / (daily_volatility * np.sqrt(252))
        
        # Max drawdown
        cummax = df_results['portfolio_value'].expanding().max()
        drawdown = (df_results['portfolio_value'] - cummax) / cummax
        max_drawdown = drawdown.min() * 100
        
        # Win rate (profitable days)
        profitable_days = (returns > 0).sum()
        win_rate = (profitable_days / len(returns)) * 100
        
        return {
            'total_return': total_return,
            'annual_return': annual_return,
            'annual_volatility': annual_volatility,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'win_rate': win_rate,
            'num_trades': len(self.trades),
            'final_portfolio_value': df_results['portfolio_value'].iloc[-1]
        }

In [14]:
df_combined = pd.read_csv("processed_financial_data_with_sentiment.csv")
def generate_signal(sentiment_score, threshold=0.3):
    if sentiment_score > threshold:
        return 1     # Buy
    elif sentiment_score < -threshold:
        return -1    # Sell
    else:
        return 0     # Hold


df_combined['signal_vader'] = df_combined['vader_sentiment'].apply(
    lambda x: generate_signal(x, threshold=0.3)
)

df_combined['signal_lr'] = df_combined['lr_sentiment'].apply(
    lambda x: generate_signal(x, threshold=0.3)
)

df_combined['signal_finbert'] = df_combined['finbert_sentiment'].apply(
    lambda x: generate_signal(x, threshold=0.3)
)


In [15]:

# Run backtests
backtester_finbert = BacktestEngine(initial_capital=100000, transaction_cost=0.001)
df_finbert_results = backtester_finbert.run_backtest(
    df=df_combined,
    signal_column='signal_finbert'
)

metrics_finbert = backtester_finbert.calculate_metrics(df_finbert_results)

print("FINBERT STRATEGY BACKTEST RESULTS")
print("="*60)
print(f"Total Return: {metrics_finbert['total_return']:.2f}%")
print(f"Annual Return: {metrics_finbert['annual_return']:.2f}%")
print(f"Annual Volatility: {metrics_finbert['annual_volatility']:.2f}%")
print(f"Sharpe Ratio: {metrics_finbert['sharpe_ratio']:.3f}")
print(f"Max Drawdown: {metrics_finbert['max_drawdown']:.2f}%")
print(f"Win Rate: {metrics_finbert['win_rate']:.2f}%")
print(f"Number of Trades: {metrics_finbert['num_trades']}")


backtester_vader = BacktestEngine(initial_capital=100000, transaction_cost=0.001)
df_vader_results = backtester_vader.run_backtest(df_combined, 'signal_vader')
metrics_vader = backtester_vader.calculate_metrics(df_vader_results)

backtester_lr = BacktestEngine(initial_capital=100000, transaction_cost=0.001)
df_lr_results = backtester_lr.run_backtest(df_combined, 'signal_lr')
metrics_lr = backtester_lr.calculate_metrics(df_lr_results)

FINBERT STRATEGY BACKTEST RESULTS
Total Return: 9.75%
Annual Return: 61.33%
Annual Volatility: 0.22%
Sharpe Ratio: 2.372
Max Drawdown: -5.33%
Win Rate: 60.42%
Number of Trades: 5


In [16]:
# Compare metrics
comparison = pd.DataFrame({
    'VADER': metrics_vader,
    'Logistic Regression': metrics_lr,
    'FinBERT': metrics_finbert
})
print(comparison)

                               VADER  Logistic Regression        FinBERT
total_return               10.330392            14.299207       9.746202
annual_return              65.796407            98.841934      61.330855
annual_volatility           0.217830             0.174334       0.217968
sharpe_ratio                2.501874             4.114671       2.372283
max_drawdown               -4.683039            -2.843778      -5.327757
win_rate                   62.500000            47.916667      60.416667
num_trades                  1.000000             1.000000       5.000000
final_portfolio_value  110330.391947        114299.207164  109746.202401
