# ASG Quant Fund Project

#### Armaan Gandhara

## Week 1

### Data Loader Development

In [2]:
import yfinance as yf
import pandas as pd
from typing import List, Union

In [3]:
class data_loader:

    def __init__(self):
        pass

    def get_data(self, ticker: str, start: str, end: str) -> pd.DataFrame:
        ticker = ticker.replace('.', '-')

        data = yf.download(ticker, start=start, end=end, progress=False)
        if data.empty:
            print(f"[!] Failed to download {ticker}. Skipping.")
            return None
        data.dropna(inplace=True)
        if 'Adj Close' in data.columns:
            data.drop(columns=['Adj Close'], inplace=True)
        #data.rename(columns={'Open': 'open','High': 'high','Low': 'low','Close': 'close','Volume': 'volume'}, inplace=True)

        #required_cols = ['open', 'high', 'low', 'close', 'volume']
        #data = data[required_cols]
        data = data.droplevel('Ticker', axis=1)
        data.reset_index(inplace=True)
        data.index = data['Date']
        del data['Date']
        data.index.name = 'Date'

        return data


    def get_multiple_data(self, tickers: List[str], start: str, end: str) -> dict:
        data_dict = {}
        for ticker in tickers:
            df = self.get_data(ticker, start, end)
            if df is not None:
                data_dict[ticker] = df

        return data_dict

In [14]:
dt = data_loader()
tickers = ['AAPL', 'BRK.B', "JPM"]
data = dt.get_multiple_data(tickers, '2025-01-01', '2025-02-01')
type(data.keys())
if 'AAPL' in data.keys():
    print("YES")
else:
    print("NOO")

YES


In [19]:
data['AAPL']

Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-01-02,243.263199,248.500565,241.238085,248.330961,55740700
2025-01-03,242.774368,243.592387,241.307905,242.774368,40244100
2025-01-06,244.410416,246.73481,242.614744,243.722074,45045600
2025-01-07,241.627136,244.959095,240.769205,242.395272,40856000
2025-01-08,242.115952,243.123531,239.472335,241.33783,37628900
2025-01-10,236.280045,239.582077,232.439303,239.432429,61710900
2025-01-13,233.835922,234.105277,229.167192,232.968021,49630700
2025-01-14,232.718613,235.551775,231.910564,234.185076,39435300
2025-01-15,237.297562,238.38495,233.865838,234.075339,39832000
2025-01-16,227.710693,237.43723,227.481251,236.77883,71759100


### Strategy 1 Mean Reversion

#### Not Used

In [9]:
import pandas as pd
import pandas_ta as ta

  from pkg_resources import get_distribution, DistributionNotFound


In [None]:

class mean_reversion_strategy:
    def __init__(self, lookback: int = 20, std_dev: float = 2.0, threshold: float = 0.0):
        self.lookback = lookback
        self.std_dev = std_dev
        self.threshold = threshold

    def generate_signals(self, data: pd.DataFrame) -> pd.DataFrame:
        df = data.copy()

        bb = ta.bbands(close=df['Close'], length=self.lookback, std=self.std_dev)

        if bb is None or bb.empty:
            raise ValueError("Bollinger Bands calculation failed. Check input data.")

        df = df.join(bb)

        df['signal'] = 0  # Default to hold (0)
        
        df.loc[df['Close'] < df[f'BBL_{self.lookback}_{self.std_dev}'] * (1 - self.threshold), 'signal'] = 1
        
        df.loc[df['Close'] > df[f'BBU_{self.lookback}_{self.std_dev}'] * (1 + self.threshold), 'signal'] = -1

        return df[['signal']]


#### Used

In [None]:
from backtesting import Strategy, Backtest
import pandas as pd
import pandas_ta as pdt

class mean_reversion_strategy(Strategy):
    length = 20
    std = 2.0
    def init(self):
        price = pd.Series(self.data.Close)
        bb = ta.bbands(close=price, length=self.length, std=self.std)

        self.lower = self.I(lambda: bb[f'BBL_{self.length}_{self.std}'])
        self.upper = self.I(lambda: bb[f'BBU_{self.length}_{self.std}'])

    def next(self):
        price = self.data.Close[-1]
        if price < self.lower[-1] and not self.position:
            self.buy(size=int(self.equity / price))
        elif price > self.upper[-1] and not self.position:
            self.sell(size=int(self.equity / price))

        if self.position.is_long and price > self.data.Close[-2]:
            self.position.close()
        elif self.position.is_short and price < self.data.Close[-2]:
            self.position.close()



### Backtesting Engine

In [None]:
class GenericBacktestEngine:
    def __init__(self, strategy_cls, strategy_kwargs: dict = None, cash: float = 10000, commission: float = 0.002):
        self.strategy_cls = strategy_cls
        self.strategy_kwargs = strategy_kwargs or {}
        self.cash = cash
        self.commission = commission
        

    def run(self, data: pd.DataFrame):
        bt = Backtest(
            data,
            self.strategy_cls,
            cash=self.cash,
            commission=self.commission
        )
        stats = bt.run(**self.strategy_kwargs)
        return stats

    def plot(self, data: pd.DataFrame):
        bt = Backtest(
            data,
            self.strategy_cls,
            cash=self.cash,
            commission=self.commission
        )
        bt.run(**self.strategy_kwargs)
        bt.plot()

    def batch_backtest(self, data_dict: dict):
        results = {}
        for ticker, data in data_dict.items():
            try:
                stats = self.run(data)
                results[ticker] = stats
            except Exception as e:
                print(f"Failed on {ticker}: {e}")
        return results


### Improving the Mean Reversion Strategy

In [36]:
from backtesting import Strategy, Backtest
import pandas_ta as ta
class mean_reversion_strategy(Strategy):
    length = 20
    std = 2.0
    
    def init(self):
        price = pd.Series(self.data.Close)
        bb = ta.bbands(close=price, length=self.length, std=self.std)
        self.lower = self.I(lambda: bb[f'BBL_{self.length}_{self.std}'])
        self.upper = self.I(lambda: bb[f'BBU_{self.length}_{self.std}'])

    def next(self):
        price = self.data.Open[-1]
        # Entry
        if price < self.lower[-1]*1.2 and not self.position:
            self.buy(size=int(self.equity / price), sl=(price*0.90), limit=price*0.95)
        elif price > self.upper[-1] and not self.position:
            self.sell(size=int(self.equity / price))

        # Exit
        if self.position.is_long and price >= self.upper[-1]:
            self.position.close()
        elif self.position.is_short and price <= self.lower[-1]:
            self.position.close()

In [37]:
dt = data_loader()
data = dt.get_data("TSLA", '2023-01-01', '2025-06-01')

#Fix index issues
#data.index = data['Date']
#del data['Date']
#data.index.name = 'Date'

engine = GenericBacktestEngine(
    strategy_cls=mean_reversion_strategy,
    strategy_kwargs={'length': 20, 'std': 2.0},
    cash=10000,
    commission=0.000
)

results = engine.run(data)
engine.plot(data)
print(results)




Start                     2023-01-03 00:00:00
End                       2025-05-30 00:00:00
Duration                    878 days 00:00:00
Exposure Time [%]                     76.3245
Equity Final [$]                  24054.21955
Equity Peak [$]                   24894.85929
Return [%]                           140.5422
Buy & Hold Return [%]               100.01154
Return (Ann.) [%]                    44.22484
Volatility (Ann.) [%]                87.73044
CAGR [%]                             28.64946
Sharpe Ratio                           0.5041
Sortino Ratio                         1.30548
Calmar Ratio                          1.00383
Alpha [%]                           130.00837
Beta                                  0.10533
Max. Drawdown [%]                    -44.0561
Avg. Drawdown [%]                    -9.63726
Max. Drawdown Duration      326 days 00:00:00
Avg. Drawdown Duration       42 days 00:00:00
# Trades                                   32
Win Rate [%]                      

In [1]:
sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
sp500 = sp500['Symbol'].to_list()
tickers = []
sorter = True
for ticker in sp500:
    if sorter:
        tickers.append(ticker)
        
    #sorter = not sorter


NameError: name 'pd' is not defined

In [105]:
data_loader = data_loader()
data = data_loader.get_multiple_data(tickers, '2023-01-01', '2025-06-01')

# Run batch test
engine = GenericBacktestEngine(mean_reversion_strategy)
results = engine.batch_backtest(data)

# Example: Print best performing stocks
sorted_results = sorted(results.items(), key=lambda x: x[1]['Return [%]'], reverse=True)
for ticker, stat in sorted_results[:10]:
    print(f"{ticker}: {stat['Return [%]']:.2f}%")




MPWR: 132.47%
LII: 110.41%
TPL: 95.95%
CRWD: 94.00%
INTU: 92.07%
NOW: 83.37%
MCK: 77.49%
AZO: 72.43%
ISRG: 68.93%
NFLX: 67.76%


In [106]:
num = 0
total = 0
for ticker, stat in sorted_results:
    if stat['Return [%]']!=0:
        total += stat['Return [%]']
        num += 1
        print(f"{ticker}: {stat['Return [%]']:.2f}%")

MPWR: 132.47%
LII: 110.41%
TPL: 95.95%
CRWD: 94.00%
INTU: 92.07%
NOW: 83.37%
MCK: 77.49%
AZO: 72.43%
ISRG: 68.93%
NFLX: 67.76%
MCO: 66.08%
SNPS: 61.07%
META: 58.01%
CEG: 57.25%
RCL: 55.87%
MSCI: 54.77%
KLAC: 52.94%
CPAY: 52.43%
BLK: 52.31%
GS: 51.76%
ORLY: 48.30%
MA: 45.82%
MSFT: 45.79%
HD: 45.58%
GE: 45.31%
AXP: 43.98%
CDNS: 43.90%
TT: 43.87%
MSI: 41.83%
ADSK: 40.44%
RL: 39.92%
ANSS: 38.81%
BKNG: 36.41%
ZBRA: 35.77%
BRK.B: 35.24%
ROP: 34.13%
NOC: 32.87%
VMC: 32.50%
ACN: 31.27%
VRTX: 28.54%
CB: 28.36%
ESS: 28.05%
AXON: 26.73%
GWW: 25.75%
PH: 25.18%
NVR: 25.15%
VRSK: 25.09%
TDY: 24.29%
EQIX: 23.40%
DPZ: 23.05%
PODD: 22.57%
IDXX: 22.54%
AAPL: 22.43%
ETN: 22.18%
CMI: 22.04%
WDAY: 20.84%
GEV: 18.19%
DE: 18.16%
PAYC: 17.28%
WAT: 16.07%
FFIV: 16.06%
ECL: 15.88%
SYK: 15.45%
MOH: 15.30%
RMD: 15.09%
LH: 15.08%
PSA: 14.79%
TDG: 14.40%
CME: 14.05%
MTD: 13.80%
CHTR: 13.75%
CAT: 13.71%
POOL: 13.60%
PWR: 13.48%
UHS: 12.10%
TRV: 11.74%
ANET: 11.46%
GRMN: 11.29%
GD: 10.94%
COIN: 10.80%
RSG: 10.53%
CRM

In [None]:
ave = total/num
ave

9.671420751152958

## Week 2

### Adding RSI to Strategy

In [53]:
from backtesting import Strategy, Backtest
import pandas_ta as ta
class mean_reversion_strategy(Strategy):
    length = 20
    std = 2.0
    RSI_lower = 30
    RSI_upper = 70
    RSI_length = 14
    
    def init(self):
        price = pd.Series(self.data.Close)
        bb = ta.bbands(close=price, length=self.length, std=self.std)
        self.lower = self.I(lambda: bb[f'BBL_{self.length}_{self.std}'])
        self.upper = self.I(lambda: bb[f'BBU_{self.length}_{self.std}'])
        self.rsi = self.I(lambda: ta.rsi(close=price, length=self.RSI_length))



    def next(self):
        price = self.data.Open[-1]
        # Entry
        if price < self.lower[-1]*1.2 and not self.position:
            if self.rsi < self.RSI_lower:
                self.buy(size=int(self.equity / price), sl=(price*0.90), limit=price*0.95)
        elif price > self.upper[-1] and not self.position:
            if self.rsi > self.RSI_upper:
                self.sell(size=int(self.equity / price))

        # Exit
        if self.position.is_long and price >= self.upper[-1]:
            self.position.close()
        elif self.position.is_short and price <= self.lower[-1]:
            self.position.close()

In [56]:
dt = data_loader()
data = dt.get_data("TSLA", '2023-01-01', '2025-06-01')

#Fix index issues
#data.index = data['Date']
#del data['Date']
#data.index.name = 'Date'

engine = GenericBacktestEngine(
    strategy_cls=mean_reversion_strategy,
    strategy_kwargs={'length': 20, 'std': 2.0, 'RSI_upper': 0, 'RSI_lower': 30, 'RSI_length': 70},
    cash=10000,
    commission=0.000
)

results = engine.run(data)
#engine.plot(data)
#print(results)

### Optimizing

In [None]:
# Load data
dt = data_loader()
data = dt.get_data("TSLA", '2023-01-01', '2025-06-01')

# Set up backtest object (no need to pass kwargs yet — the optimizer does this)
bt = Backtest(data, mean_reversion_strategy, cash=10000, commission=0.000)

# Optimize parameters
# Unpack the tuple returned by optimize
best_stats, heatmap = bt.optimize(
    length=[10, 15, 20, 25],
    std=[1.5, 2.0, 2.5],
    RSI_length=[10, 14, 20],
    RSI_upper=[0, 100, 50],
    RSI_lower=[0, 100, 50],
    maximize='Return [%]',
    method='grid',
    return_heatmap=True,
    random_state=42
)

# Now extract best parameters correctly
best_params = best_stats._strategy  # This is a namedtuple

In [81]:
best_params_list = [float(best_params.length), float(best_params.std), float(best_params.RSI_length), float(best_params.RSI_upper), float(best_params.RSI_lower)]
best_params_list


[20.0, 2.0, 10.0, 0.0, 100.0]

### Benchmark Development

In [353]:
from backtesting import Strategy

class BenchmarkBuyHold(Strategy):
    def init(self):
        self.buy_executed = False
        self.current_bar = 0
        self.total_bars = len(self.data)  # Full dataset length

    def next(self):
        # Buy once on the first bar
        if not self.buy_executed:
            self.buy(size=int(self.equity / self.data.Close[-1]))
            self.buy_executed = True
            print("BUY executed")


        self.current_bar += 1
        print(self.current_bar)
        print(self.total_bars)
        if self.current_bar+1 == self.total_bars and self.position:
            self.position.close()
            print("CLOSE executed")


In [393]:
import pandas as pd
import numpy as np
import datetime
from scipy.stats import linregress

class benchmark:

    def __init__(self, start, end, ticker: str = '^GSPC'):
        self.ticker = ticker
        self.start = datetime.datetime.strptime(start, "%Y-%m-%d")
        self.end = datetime.datetime.strptime(end, "%Y-%m-%d")
        dt = data_loader()
        self.benchmark_data = dt.get_data(self.ticker, self.start, self.end)
        self.daily_returns = self.get_daily_returns()
        self.returns_df = self.benchmark_data[['Close']].copy()
        self.returns_df['Returns'] = self.daily_returns
        self.returns_df.dropna(inplace=True)

    def get_daily_returns(self):
        returns = self.benchmark_data['Close'].pct_change().dropna()
        return returns  # Series

    def get_total_return(self):
        start_price = self.benchmark_data['Close'].iloc[0]
        end_price = self.benchmark_data['Close'].iloc[-1]
        return (end_price / start_price) - 1

    def get_metrics(self, risk_free_rate=0.0):
        df = self.returns_df.copy()
        df['Cumulative'] = (1 + df['Returns']).cumprod()
        df['Peak'] = df['Cumulative'].cummax()
        df['Drawdown'] = df['Cumulative'] / df['Peak'] - 1

        duration = self.end - self.start
        total_return = self.get_total_return()
        annualized_return = (1 + total_return) ** (252 / len(df)) - 1
        volatility = df['Returns'].std() * np.sqrt(252)
        cagr = (df['Cumulative'].iloc[-1]) ** (1 / (len(df) / 252)) - 1
        sharpe = (df['Returns'].mean() * 252 - risk_free_rate) / (df['Returns'].std() * np.sqrt(252))
        downside_returns = df[df['Returns'] < 0]['Returns']
        sortino = (df['Returns'].mean() * 252) / (downside_returns.std() * np.sqrt(252)) if not downside_returns.empty else np.nan
        max_dd = df['Drawdown'].min()
        avg_dd = df['Drawdown'][df['Drawdown'] < 0].mean()

        # Drawdown duration
        drawdown_durations = []
        current_dd_duration = 0
        for dd in df['Drawdown']:
            if dd < 0:
                current_dd_duration += 1
            else:
                if current_dd_duration > 0:
                    drawdown_durations.append(current_dd_duration)
                    current_dd_duration = 0
        if current_dd_duration > 0:
            drawdown_durations.append(current_dd_duration)
        max_dd_duration = max(drawdown_durations) if drawdown_durations else 0
        avg_dd_duration = np.mean(drawdown_durations) if drawdown_durations else 0

        # Alpha & Beta vs market (self vs itself here, but you could modify this to compare with another benchmark)
        market_returns = df['Returns']
        slope, intercept, r_value, p_value, std_err = linregress(market_returns, df['Returns'])
        beta = slope
        alpha = (annualized_return - risk_free_rate) - beta * (annualized_return - risk_free_rate)

        return pd.Series({
            'Start': self.start,
            'End': self.end,
            'Duration': duration,
            'Exposure Time [%]': 100.0,  # Always exposed
            'Equity Final [$]': df['Cumulative'].iloc[-1] * 10000,
            'Equity Peak [$]': df['Peak'].max() * 10000,
            'Return [%]': total_return * 100,
            'Buy & Hold Return [%]': total_return * 100,
            'Return (Ann.) [%]': annualized_return * 100,
            'Volatility (Ann.) [%]': volatility * 100,
            'CAGR [%]': cagr * 100,
            'Sharpe Ratio': sharpe,
            'Sortino Ratio': sortino,
            'Calmar Ratio': cagr / abs(max_dd) if max_dd != 0 else np.nan,
            'Alpha [%]': alpha * 100,
            'Beta': beta,
            'Max. Drawdown [%]': max_dd * 100,
            'Avg. Drawdown [%]': avg_dd * 100,
            'Max. Drawdown Duration': pd.Timedelta(days=int(max_dd_duration)),
            'Avg. Drawdown Duration': pd.Timedelta(days=int(avg_dd_duration)),
        })


In [390]:
benchmark = benchmark('2025-01-01', '2025-06-09')
daily_returns = benchmark.daily_returns
total_returns = benchmark.get_total_return()

In [391]:
data = benchmark.benchmark_data
price = pd.Series(data['Close'])
beginning_price = price.iloc[0]
ending_price = price.iloc[-1]
print(beginning_price)
print(ending_price)

5868.5498046875
6000.35986328125


In [396]:
b = benchmark('2025-01-01', '2025-06-8')
print(b.get_metrics())


Start                     2025-01-01 00:00:00
End                       2025-06-08 00:00:00
Duration                    158 days, 0:00:00
Exposure Time [%]                       100.0
Equity Final [$]                 10224.604141
Equity Peak [$]                   10469.62215
Return [%]                           2.246041
Buy & Hold Return [%]                2.246041
Return (Ann.) [%]                    5.422472
Volatility (Ann.) [%]               26.014657
CAGR [%]                             5.422472
Sharpe Ratio                         0.330548
Sortino Ratio                        0.444881
Calmar Ratio                          0.28687
Alpha [%]                                 0.0
Beta                                      1.0
Max. Drawdown [%]                  -18.902206
Avg. Drawdown [%]                    -6.07445
Max. Drawdown Duration       75 days 00:00:00
Avg. Drawdown Duration       32 days 00:00:00
dtype: object


### Risk Management

In [89]:
class RiskManagement:
    """A class to manage risk allocations and strategy distribution based on user's profile."""
    
    # Class-level constants for better maintainability
    TOLERANCE_MAPPING = {
        'low': 0.2,
        'medium': 0.5,
        'high': 0.8
    }
    
    TIME_MAPPING = {
        'long': 0.2,
        'medium': 0.5,
        'short': 0.8
    }
    
    MAX_RISK_SCORE = TOLERANCE_MAPPING['high'] * TIME_MAPPING['short']

    def __init__(self, user_tolerance: str, user_time: str):
        """Initialize with user's risk tolerance and time horizon.
        
        Args:
            user_tolerance: Risk tolerance ('low', 'medium', or 'high')
            user_time: Time horizon ('long', 'medium', or 'short')
        """
        self.user_tolerance = user_tolerance.lower()
        self.user_time = user_time.lower()
        
        # Validate inputs
        self._validate_inputs()
        self._risk_score = self._get_risk_score()  # Calculate risk score at initialization

    def _validate_inputs(self):
        """Validate that inputs are within expected values."""
        if self.user_tolerance not in self.TOLERANCE_MAPPING:
            raise ValueError(f"Invalid risk tolerance. Expected one of: {list(self.TOLERANCE_MAPPING.keys())}")
            
        if self.user_time not in self.TIME_MAPPING:
            raise ValueError(f"Invalid time horizon. Expected one of: {list(self.TIME_MAPPING.keys())}")

    def _get_risk_score(self) -> float:
        """Calculate and return the risk allocation ratio.
        
        Returns:
            float: Risk allocation ratio between 0 and 1
        """
        tol_num = self.TOLERANCE_MAPPING[self.user_tolerance]
        tim_num = self.TIME_MAPPING[self.user_time]
        
        risk_score = (tol_num * tim_num) / self.MAX_RISK_SCORE
        
        # Ensure the result is between 0 and 1
        return max(0.0, min(1.0, risk_score))

    def get_strategy_allocation(self) -> dict:
        """Calculate allocation percentages for each strategy using curved relationships.
        
        Returns:
            dict: {'mean_reversion': x%, 'momentum': y%, 'factor_investing': z%}
        """
        # Strategy weights based on risk score
        mean_rev = self._risk_score * 0.7  # Short-term strategy scales with risk
        momentum = 0.5 - (0.5 - self._risk_score)**2  # Medium-term peaks in middle
        factor_inv = (1 - self._risk_score) * 0.7  # Long-term scales inversely
        
        # Normalize to 100%
        total = mean_rev + momentum + factor_inv
        return {
            'mean_reversion': mean_rev / total,
            'momentum': momentum / total,
            'factor_investing': factor_inv / total
        }

    def get_risk_profile(self) -> dict:
        """Return comprehensive risk profile including score and allocations.
        
        Returns:
            dict: {
                'risk_score': float,
                'allocations_advanced': dict,
                'allocations_simple': dict
            }
        """
        return {
            'risk_score': self._risk_score,
            'allocations_advanced': self.get_strategy_allocation(),
        }

In [93]:
# Create risk profile for medium risk, medium time horizon
rm = RiskManagement('High', 'Long')

# Get complete profile
profile = rm.get_risk_profile()
print(f"Risk Score: {profile['risk_score']:.2f}")
print("Advanced Allocation:", profile['allocations_advanced'])

# Or get individual components
allocations = rm.get_strategy_allocation()
print(f"Mean Reversion: {allocations['mean_reversion']:.1%}")

Risk Score: 0.25
Advanced Allocation: {'mean_reversion': 0.15384615384615385, 'momentum': 0.38461538461538464, 'factor_investing': 0.46153846153846145}
Mean Reversion: 15.4%
