<a href="https://colab.research.google.com/github/Pankaj-2003/Strategy_Performance_Python/blob/main/Quant_Analyst_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime , timedelta

## Dataframe columns
### Date (datetime64[ns]): The date and time of the trade
### Symbol (string): The ticker symbol of the traded security
### Side (string): Either 'buy' or 'sell'
### Size (float, optional): The number of shares traded (default to 1 if not provided)
### Price (float): The price at which the trade was executed

In [2]:
def getTickerPrice(ticker: str, date: datetime) -> float:
    # This function returns the price of the security 'ticker' at the given 'date'
    # For the purpose of this exercise, assume it returns a random number
    return random.uniform(1, 100)  # Example implementation

In [None]:
metrics = ["Sharpe" , "max-drawdown" ,  "profit factor" , "total net profit" , "Percent Profitable" , "average win\loss Ratio" , "Expectancy" , "Recovery factor" , "average r multiple" , "ROI"]

In [3]:
def calculate_sharpe(daily_returns, risk_free_rate=0.01):
    average_daily_return = daily_returns.mean()
    std_daily_return = daily_returns.std()

    # Avoid division by zero
    if std_daily_return == 0:
        return np.inf

    sharpe_ratio = (average_daily_return - risk_free_rate) / std_daily_return
    return sharpe_ratio

In [4]:
def calculate_max_drawdown(daily_returns):
    cumulative_return = (1 + daily_returns).cumprod()
    cumulative_max = cumulative_return.cummax()
    drawdown = cumulative_return - cumulative_max
    max_drawdown = drawdown.min()

    return max_drawdown

In [5]:
def calculate_profit_factor(trades):
    winning_trades = trades[trades['Return'] > 0]
    losing_trades = trades[trades['Return'] < 0]

    total_profit = winning_trades['Return'].sum()
    total_loss = -losing_trades['Return'].sum()  # Negate to get positive loss value

    if total_loss == 0:
        return np.inf if total_profit > 0 else 0  # Handle edge cases

    profit_factor = total_profit / total_loss
    return profit_factor


In [6]:
def percent_profitable(trades):
  winning_trades = trades[trades["Return"] > 0]
  total_trades = len(trades)
  percent_profitable = (len(winning_trades) / total_trades) * 100 if total_trades > 0 else 0
  return percent_profitable

In [7]:
def average_win_loss_ratio(trades):
    average_win = trades[trades["Return"] > 0]["Return"].mean() if len(trades[trades["Return"] > 0]) > 0 else 0
    average_loss = trades[trades["Return"] < 0]["Return"].mean() if len(trades[trades["Return"] < 0]) > 0 else 0

    average_win_loss_ratio = average_win / average_loss if average_loss  != 0 else 0
    return average_win_loss_ratio


In [8]:
def expectancy(trades):
  average_profit = trades[trades["Return"] > 0]["Return"].mean() if len(trades[trades["Return"] > 0]) > 0 else 0
  average_loss = trades[trades["Return"] < 0]["Return"].mean() if len(trades[trades["Return"] < 0]) > 0 else 0
  winning_trades = trades[trades["Return"] > 0]
  losing_trades = trades[trades["Return"] < 0]
  total_trades = len(trades)
  win_rate = len(winning_trades) / total_trades if total_trades > 0 else 0
  loss_rate = len(losing_trades) / total_trades if total_trades > 0 else 0

  expectancy = (win_rate * average_profit) - (loss_rate * average_loss)
  return expectancy

In [9]:
def recovery_factor(trades  ,daily_returns):
  total_profit = trades[trades['Return'] > 0]['Return'].sum()
  total_loss = -trades[trades['Return'] < 0]['Return'].sum()  # Negate to get positive loss value
  total_net_profit = total_profit - total_loss
  max_dd = calculate_max_drawdown(daily_returns)
  recovery_factor = total_net_profit / abs(max_dd) if max_dd != 0 else np.inf
  return recovery_factor

In [10]:
def trade_perf(trades : pd.DataFrame) -> pd.Series:
  # if dataframe is empty
  if trades.empty:
    return pd.Series({
            'Sharpe': 0,
            'max-drawdown': 0,
            'profit factor': 0,
            'total net profit': 0,
            'Percent Profitable': 0,
            'average win\loss Ratio': 0,
            'Expectancy': 0,
            'Recovery factor': 0,
            'volatility': 0,
            'ROI': 0})

  # handling missing values

  trades['Size'] = trades['Size'].fillna(1)
  trades = trades[trades['Symbol'].notna()]
  trades = trades[trades['Side'].notna()]

  # seperate way to fill price
  def fill_missing_prices(row):
    if pd.isna(row['Price']):
        row['Price'] = getTickerPrice(row['Symbol'], row['Date'])
    return row
  trades = trades.apply(fill_missing_prices, axis=1)




  # calculating returns
  def calculate_return(row):
      # Get the current date as a string in the format 'YYYY-MM-DD'
      current_date = datetime.now().strftime('%Y-%m-%d')
      # Fetch the current price using the current date
      current_price = getTickerPrice(row['Symbol'], current_date)
      if row['Side'] == 'buy':
          return (current_price - row['Price']) * row['Size']
      else:
          return (row['Price'] - current_price) * row['Size']

  trades['Return'] = trades.apply(calculate_return, axis=1 )  # Applying the function to each row of the df
  daily_returns = trades.groupby('Date')['Return'].sum() # daily return for sharpe


  # 1- calculating metrics

  sharpe_ratio = calculate_sharpe(daily_returns)
  print(trades)

  # 2 - max_drawdown
  maxdd = calculate_max_drawdown(daily_returns)

  # 3 - profit factor
  profit_factor = calculate_profit_factor(trades)

  # 4 - total net profit
  total_profit = trades[trades['Return'] > 0]['Return'].sum()
  total_loss = -trades[trades['Return'] < 0]['Return'].sum()  # Negate to get positive loss value
  total_net_profit = total_profit - total_loss

  # 5 - percent profitable
  perc_profitable = percent_profitable(trades)
  # 6 - average win/loss ratio
  avg_win_loss_ratio = average_win_loss_ratio(trades)


  # 7 - expectancy
  expectncy = expectancy(trades)

  # 8 - recovery factor
  recovery_factr = recovery_factor(trades  ,daily_returns)

  # 9 - volatility
  volatility = trades['Return'].std() #we can calculate stock wise volatility using  trades.groupby('Symbol')['Return'].std()


  # 10 - roi
  total_investement = (trades["Price"]* trades["Size"]).sum()
  roi = (total_net_profit / total_investement) * 100 if total_investement != 0 else 0


  metrics = pd.Series({
            'Sharpe': sharpe_ratio,
            'max-drawdown': maxdd,
            'profit factor': profit_factor,
            'total net profit': total_net_profit,
            'Percent Profitable': perc_profitable,
            'average win\loss Ratio': avg_win_loss_ratio,
            'Expectancy': expectncy,
            'Recovery factor': recovery_factr,
            'volatility': volatility,
            'ROI': roi})
  metrics_rounded = metrics.round(2)
  metrics = metrics_rounded.apply(lambda x: f"{x:.2f}")

  return metrics

In [212]:
symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
sides = ['buy', 'sell']
base_date = datetime(2024, 7, 15)
dates = [base_date + timedelta(days=i//4) for i in range(20)]
data = {
    'Date': dates,
    'Symbol': [random.choice(symbols) for _ in range(20)],
    'Side': [random.choice(sides) for _ in range(20)],
    'Size': [random.randint(1, 100) for _ in range(20)],
    'Price': [random.uniform(50, 300) for _ in range(20)],
}
trades = pd.DataFrame(data)




In [213]:
# Introduce missing values randomly
for col in ['Symbol', 'Size', 'Price']:
    trades.loc[random.sample(range(len(trades)), k=5), col] = np.nan  # Set 5 random entries in each column to NaN

In [214]:
trades

Unnamed: 0,Date,Symbol,Side,Size,Price
0,2024-07-15,,sell,58.0,
1,2024-07-15,GOOGL,buy,73.0,245.181768
2,2024-07-15,,sell,,
3,2024-07-15,GOOGL,buy,1.0,174.587931
4,2024-07-16,,sell,26.0,
5,2024-07-16,AMZN,sell,4.0,78.810939
6,2024-07-16,GOOGL,buy,12.0,110.556298
7,2024-07-16,AAPL,sell,,
8,2024-07-17,,sell,,167.581858
9,2024-07-17,GOOGL,sell,64.0,208.570899


In [215]:
trade_perf(trades)

         Date Symbol  Side  Size       Price        Return
1  2024-07-15  GOOGL   buy  73.0  245.181768 -14594.983138
3  2024-07-15  GOOGL   buy   1.0  174.587931   -154.364512
5  2024-07-16   AMZN  sell   4.0   78.810939    276.574466
6  2024-07-16  GOOGL   buy  12.0  110.556298   -231.984775
7  2024-07-16   AAPL  sell   1.0   60.648286     26.545737
9  2024-07-17  GOOGL  sell  64.0  208.570899   9189.684453
10 2024-07-17   MSFT   buy  31.0  215.539693  -5428.847539
11 2024-07-17  GOOGL   buy  52.0  293.503963 -11633.979243
12 2024-07-18  GOOGL  sell   1.0    2.184768    -54.546046
13 2024-07-18   MSFT  sell   1.0  105.265148     63.645962
14 2024-07-18   MSFT  sell  19.0   58.926310    708.178156
16 2024-07-19  GOOGL   buy   2.0  169.361099   -260.465310
17 2024-07-19   MSFT   buy  92.0  111.229284  -6064.959611
18 2024-07-19  GOOGL  sell   7.0  264.888247   1686.529149
19 2024-07-19   MSFT  sell  10.0  247.043750   1803.575840


Sharpe                                    -0.77
max-drawdown              -17056094760869744.00
profit factor                              0.36
total net profit                      -24669.40
Percent Profitable                        46.67
average win\loss Ratio                    -0.41
Expectancy                              3478.59
Recovery factor                           -0.00
volatility                              5765.20
ROI                                      -34.65
dtype: object

### How did i compute returns in case of long short strategies
## i first get the current price of stock using auxiliary function , then for buy side we do current price - (buy_price * size) and vice versa for sell side

In [18]:
test = pd.read_csv('testData.csv')

In [22]:
# preprocessing
test['transactionDate'] = test['transactionDate'].str.replace('-', '/')
test['transactionDate'] = pd.to_datetime(test['transactionDate'], format='%m/%d/%Y')
test['ticker'] = test['ticker'].astype(str)
test['type'] = test['type'].map({'Purchase': 'buy', 'Sale (Full)': 'sell', 'Sale (Partial)': 'sell'})

In [24]:
test.dtypes

disclosureYear                     int64
disclosureDate                    object
transactionDate           datetime64[ns]
owner                             object
ticker                            object
assetDescription                  object
type                              object
amount                            object
representative                    object
district                          object
capitalGainsOver200USD            object
option_symbol                     object
dtype: object

In [25]:
# extracting lower bound
test['amount'] = test['amount'].str.replace('$', '').str.replace(',', '')
test['amount'] = test['amount'].apply(lambda x: float(x.split(' - ')[0]) if isinstance(x, str) else x)

In [26]:
test

Unnamed: 0,disclosureYear,disclosureDate,transactionDate,owner,ticker,assetDescription,type,amount,representative,district,capitalGainsOver200USD,option_symbol
0,2023,6/15/2023,2023-05-20,Spouse,AAPL,Apple Inc. Stock,buy,100001.0,Nancy Pelosi,CA-12,Yes,
1,2023,05-12-2023,2023-04-10,Self,GOOGL,Alphabet Inc. Stock,sell,50001.0,Nancy Pelosi,CA-12,No,
2,2023,12/29/2023,2022-12-06,Dependent,AMZN,AMZN Stock,buy,100001.0,Nancy Pelosi,NY-14,Yes,
3,2023,2/13/2022,2022-04-14,Self,AMZN,AMZN Stock,buy,100001.0,Nancy Pelosi,FL-9,No,
4,2023,6/23/2023,2022-03-11,Joint,MSFT,MSFT Stock,buy,1001.0,Nancy Pelosi,CA-12,Yes,
...,...,...,...,...,...,...,...,...,...,...,...,...
97,2023,3/15/2023,2023-07-26,Dependent,AAPL,AAPL Stock,sell,1001.0,Nancy Pelosi,FL-9,Yes,
98,2022,06-08-2022,2023-05-03,Joint,TSLA,TSLA Stock,sell,50001.0,Nancy Pelosi,CA-12,No,
99,2023,7/24/2022,2023-09-16,Self,GOOGL,GOOGL Stock,sell,1001.0,Nancy Pelosi,TX-7,No,
100,2023,12/25/2022,2022-06-25,Spouse,AAPL,AAPL Stock,sell,100001.0,Nancy Pelosi,NY-14,Yes,


In [230]:
# Get price for each trade using getTickerPrice function
test['price'] = test.apply(lambda row: getTickerPrice(row['ticker'], row['transactionDate']), axis=1)

In [232]:
# Calculate size based on amount and price
test['size'] = test['amount'] / test['price']

In [232]:
# Select required columns and rename them
test = test[['transactionDate', 'ticker', 'type', 'size', 'price']]
test.columns = ['Date', 'Symbol', 'Side', 'Size', 'Price']

In [None]:
# Sort by date
test = test.sort_values('Date')

In [None]:
# Calculate trade performance
performance_metrics = trade_perf(test)

In [None]:
# Print and interpret the results
print("Nancy Pelosi's Trading Performance Metrics:")
for metric, value in performance_metrics.items():
    print(f"{metric}: {value}")

In [None]:
# Interpretation
print("\nInterpretation:")