In [19]:
import pandas as pd
import numpy as np
#!wget "https://raw.githubusercontent.com/Dhruba34/Data-Science-projects/refs/heads/main/assignments%20%26%20project/week%204/df.py"
import df
class BacktestEngine:
    def __init__(self, initial_capital=100000, transaction_cost=0.001):
        """
        Initialize backtester.
        
        Parameters:
        - initial_capital: Starting amount of money
        - transaction_cost: Percentage cost per trade (0.1% = 0.001)
        """
        self.initial_capital = initial_capital
        self.transaction_cost = transaction_cost
        self.portfolio_value = [initial_capital]
        self.cash = initial_capital
        self.shares = 0
        self.trades = []
    
    def process_signal(self, date, signal, price, sentiment_score):
        """
        Execute trade based on signal.
        
        Signals:
        - 1: Buy signal
        - -1: Sell signal
        - 0: Hold
        """
        if signal == 1 and self.cash > 0:
            # Buy: use all available cash
            transaction_cost_amount = self.cash * self.transaction_cost
            buy_amount = self.cash - transaction_cost_amount
            self.shares = buy_amount / price
            self.cash = 0
            
            self.trades.append({
                'date': date,
                'action': 'BUY',
                'price': price,
                'shares': self.shares,
                'sentiment': sentiment_score
            })
        
        elif signal == -1 and self.shares > 0:
            # Sell: liquidate all shares
            sell_proceeds = self.shares * price
            transaction_cost_amount = sell_proceeds * self.transaction_cost
            self.cash = sell_proceeds - transaction_cost_amount
            self.shares = 0
            
            self.trades.append({
                'date': date,
                'action': 'SELL',
                'price': price,
                'proceeds': sell_proceeds - transaction_cost_amount,
                'sentiment': sentiment_score
            })
    
    def calculate_portfolio_value(self, current_price):
        """Calculate current portfolio value (cash + stock holdings)."""
        stock_value = self.shares * current_price
        return self.cash + stock_value
    
    def run_backtest(self, df, signal_column):
        """
        Run full backtest on data.
        
        Parameters:
        - df: DataFrame with date, Close price, and signal column
        - signal_column: Column name containing buy/sell/hold signals
        
        Returns: Results DataFrame with metrics
        """
        daily_values = []
        
        for idx, row in df.iterrows():
            # Process signal
            self.process_signal(
                date=row['date'],
                signal=row[signal_column],
                price=row['Close'],
                sentiment_score=row.get('sentiment', 0)
            )
            
            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(row['Close'])
            daily_values.append({
                'date': row['date'],
                'portfolio_value': portfolio_value,
                'price': row['Close'],
                'cash': self.cash,
                'shares': self.shares
            })
        
        df_results = pd.DataFrame(daily_values)
        return df_results
    
    def calculate_metrics(self, df_results):
        """
        Calculate performance metrics.
        
        Returns: Dictionary with key metrics
        """
        returns = df_results['portfolio_value'].pct_change().dropna()
        time=(df_results['date'].iloc[-1]-df_results['date'].iloc[0]).days
        total_return = (df_results['portfolio_value'].iloc[-1] / self.initial_capital - 1) * 100
        annual_return = (((df_results['portfolio_value'].iloc[-1] / self.initial_capital) ** (252 / time) - 1)) * 100
        
        daily_volatility = returns.std()
        annual_volatility = daily_volatility * np.sqrt(252)
        
        sharpe_ratio = (returns.mean() * 252) / (daily_volatility * np.sqrt(252))
        
        # Max drawdown
        cummax = df_results['portfolio_value'].expanding().max()
        drawdown = (df_results['portfolio_value'] - cummax) / cummax
        max_drawdown = drawdown.min() * 100
        
        # Win rate (profitable days)
        profitable_days = (returns > 0).sum()
        win_rate = (profitable_days / len(returns)) * 100
        
        return {
            'total_return': total_return,
            'annual_return': annual_return,
            'annual_volatility': annual_volatility,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'win_rate': win_rate,
            'num_trades': len(self.trades),
            'final_portfolio_value': df_results['portfolio_value'].iloc[-1]
        }
    def summary_report(self, metrics):
        """Print formatted performance report."""
        report = f"""
        BACKTEST SUMMARY REPORT
        ═════════════════════════════════════════════════════════
        
        RETURNS:
          Total Return: {metrics['total_return']:.2f}%
          Annual Return: {metrics['annual_return']:.2f}%
        
        RISK:
          Annual Volatility: {metrics['annual_volatility']:.2f}%
          Maximum Drawdown: {metrics['max_drawdown']:.2f}%
          Sharpe Ratio: {metrics['sharpe_ratio']:.3f}
        
        TRADING:
          Number of Trades: {metrics['num_trades']}
          Win Rate: {metrics['win_rate']:.2f}%
          Final Portfolio Value: ${metrics['final_portfolio_value']:,.2f}
        """
        print(report)
        return report

In [20]:
obj=BacktestEngine()
obj2=df.RealDataPipeline(start_date='2018-01-01',end_date='2021-12-31')
obj2.download_stock_prices()
obj2.load_financial_news('apple_news_data.csv')
obj2.align_data()
df=obj2.df_combined.head()

Downloading AAPL prices from 2018-01-01 to 2021-12-31...
Downloaded 1007 days of price data
Loading news data from apple_news_data.csv...
Loaded 29752 news articles for AAPL
Combined dataset shape: (399, 7)
Date range: 2018-01-31 to 2021-12-30


In [21]:
df.head()

Unnamed: 0,date,Close,High,Low,Open,Volume,all_headlines
0,2018-01-31,39.210747,39.447283,38.99295,39.0796,129915600,Investor Expectations to Drive Momentum within...
1,2018-03-16,41.860817,42.119476,41.766756,42.008957,157618800,Top 100 Reputable Companies Around the Globe A...
2,2018-03-27,39.584591,41.185939,39.250683,40.840273,163690400,Universal Display Corporation Stock Is Way Und...
3,2018-04-16,41.343491,41.430494,41.110694,41.157723,86313600,Detailed Research: Economic Perspectives on Ge...
4,2018-06-22,43.651009,43.941353,43.599077,43.934272,108801600,"Apple iPhone Spared Tariffs, But Could Face Ch..."


In [22]:
df['sentiment']=0
df['sentiment'].iloc[0]=1
df['sentiment'].iloc[-1]=-1
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment']=0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versu

Unnamed: 0,date,Close,High,Low,Open,Volume,all_headlines,sentiment
0,2018-01-31,39.210747,39.447283,38.99295,39.0796,129915600,Investor Expectations to Drive Momentum within...,1
1,2018-03-16,41.860817,42.119476,41.766756,42.008957,157618800,Top 100 Reputable Companies Around the Globe A...,0
2,2018-03-27,39.584591,41.185939,39.250683,40.840273,163690400,Universal Display Corporation Stock Is Way Und...,0
3,2018-04-16,41.343491,41.430494,41.110694,41.157723,86313600,Detailed Research: Economic Perspectives on Ge...,0
4,2018-06-22,43.651009,43.941353,43.599077,43.934272,108801600,"Apple iPhone Spared Tariffs, But Could Face Ch...",-1


In [23]:
res=obj.run_backtest(df,'sentiment')
metric=obj.calculate_metrics(res)
report=obj.summary_report(metric)


        BACKTEST SUMMARY REPORT
        ═════════════════════════════════════════════════════════

        RETURNS:
          Total Return: 11.10%
          Annual Return: 20.54%

        RISK:
          Annual Volatility: 0.89%
          Maximum Drawdown: -5.44%
          Sharpe Ratio: 7.995

        TRADING:
          Number of Trades: 2
          Win Rate: 75.00%
          Final Portfolio Value: $111,101.56
        
