In [1]:
##### Importing the libraries
import pandas as pd 
import numpy as np
import os
from tqdm.notebook import tqdm

# **TASK 1**


In [2]:
#fetching the important data
all_files = os.listdir("SampleData")
cp_files=[]
for file in all_files:
    if str(file).find("closePosition")==-1:
        pass
    else:
        cp_files.append(file)

In [3]:
# extracting and merging the data files
main_df = pd.DataFrame()
for i, file in tqdm(enumerate(cp_files),total=len(cp_files)):
    df = pd.read_csv(f"SampleData/{file}",parse_dates=True)
    df = df[['Key', 'ExitTime', 'Symbol', 'EntryPrice', 'Quantity', 'Pnl']]
    df['Date'] = pd.to_datetime(df.ExitTime).dt.date
    main_df = pd.concat([main_df,df])

  0%|          | 0/1110 [00:00<?, ?it/s]

In [4]:
main_df = main_df.drop_duplicates(keep="first")

In [5]:
main_df_1 =main_df.copy()

# **Task 2**

In [6]:
print(f"The total number of trades in the final dataframe {len(main_df)}")
print(f"The number of unique dates in the Date column are {len(np.unique(main_df.Date))} ")
print(f"Average trades are {len(main_df)/len(np.unique(main_df.Date)):.3f}")
print(f"Total pnl: {np.sum(main_df.Pnl):.3f}")
print(f"Number of Profitable Trades {main_df[main_df.Pnl>0].Pnl.count()}")
print(f"Number of loss Trades {main_df[main_df.Pnl<=0].Pnl.count()}")

The total number of trades in the final dataframe 1675
The number of unique dates in the Date column are 141 
Average trades are 11.879
Total pnl: 635766.250
Number of Profitable Trades 558
Number of loss Trades 1117


In [7]:
combined_stats = {"Total trades":len(main_df),
                 "Unique days":len(np.unique(main_df.Date)),
                 "Average trades": len(main_df)/len(np.unique(main_df.Date)),
                 "Total pnl": np.sum(main_df.Pnl),
                  "Profit Trades": main_df[main_df.Pnl>0].Pnl.count(),
                  "Loss Trades": main_df[main_df.Pnl<=0].Pnl.count()
                 }

with open('combined_stats.txt','w') as data: 
      data.write(str(combined_stats))

# **Task 3** 

In [8]:
main_df = main_df.sort_values("ExitTime",ascending=True)
score = []

for index, row in main_df.iterrows():
    if row['Pnl']>0:
        score.append(1)
    else:
        score.append(0)
main_df['score'] = score

In [9]:
# Equating trade to the previous type of trade
main_df["streak"] = main_df["score"].ne(main_df["score"].shift())

# Adding all Yeses/Trues to group them later
main_df["streak_number"] = main_df["streak"].cumsum()

# Grouping the streaks and finding the count
main_df["streak_count"] = main_df.groupby("streak_number").cumcount().add(1)

main_df.reset_index(inplace=True)

In [10]:
topn_streaks = main_df.groupby("streak_number")['Pnl'].sum()\
                .reset_index().sort_values("Pnl",ascending=False)

In [11]:
def top_n_streaks():
    n = input()
    win_streak = pd.DataFrame()
    loss_streak = pd.DataFrame()
    
    win_numbers = topn_streaks.streak_number.iloc[:int(n)].values
    loss_numbers = topn_streaks.streak_number.iloc[-int(n):].values
    loss_numbers = np.flip(loss_numbers)
    
    total_trades, date ,pnl_of_streaks= [], [], []
    for number in win_numbers:
        df=pd.DataFrame()
        temp_df = main_df.loc[main_df.streak_number==number].reset_index()
        total_trades.append(len(temp_df))
        date.append(f"{temp_df.iloc[0].ExitTime} to {temp_df.iloc[-1].ExitTime}")
        pnl_of_streaks.append(temp_df.Pnl.sum())
    win_streak["total_trades"]=total_trades
    win_streak["duration"] = date
    win_streak['pnl_of_streaks'] = pnl_of_streaks
    win_streak.to_csv("win.csv")
    
    total_trades,date, pnl_of_streaks= [], [], []
    for number in loss_numbers:
        df=pd.DataFrame()
        temp_df = main_df.loc[main_df.streak_number==number].reset_index()
        total_trades.append(len(temp_df))
        date.append(f"{temp_df.iloc[0].ExitTime} to {temp_df.iloc[-1].ExitTime}")
        pnl_of_streaks.append(temp_df.Pnl.sum())
    loss_streak["total_trades"]=total_trades
    loss_streak["duration"] = date
    loss_streak['pnl_of_streaks'] = pnl_of_streaks
    loss_streak.to_csv("win.csv")
    
    return win_streak, loss_streak

In [12]:
a,b = top_n_streaks()

12


In [13]:
a

Unnamed: 0,total_trades,duration,pnl_of_streaks
0,6,2021-02-01 11:29:00 to 2021-02-02 09:27:00,27826.25
1,7,2020-12-09 13:56:00 to 2020-12-10 10:26:00,22266.25
2,6,2020-11-17 11:16:00 to 2020-11-17 14:56:00,19653.75
3,6,2021-02-23 10:25:00 to 2021-02-23 11:38:00,18990.0
4,7,2021-01-25 09:40:00 to 2021-01-25 10:05:00,18515.0
5,4,2020-09-18 14:21:00 to 2020-09-18 14:21:00,16755.0
6,4,2020-11-24 14:35:00 to 2020-11-24 15:14:00,16452.5
7,6,2020-12-17 13:30:00 to 2020-12-18 09:57:00,16095.0
8,5,2020-09-21 13:37:00 to 2020-09-22 09:46:00,15813.75
9,4,2020-10-14 14:21:00 to 2020-10-14 14:22:00,15062.5


In [14]:
b

Unnamed: 0,total_trades,duration,pnl_of_streaks
0,10,2021-01-29 14:12:00 to 2021-02-01 10:44:00,-15396.25
1,11,2021-02-18 09:26:00 to 2021-02-18 11:29:00,-12970.0
2,15,2020-08-06 09:25:00 to 2020-08-06 12:26:00,-12872.5
3,8,2020-11-04 09:30:00 to 2020-11-04 10:32:00,-12730.0
4,13,2021-01-27 12:17:00 to 2021-01-28 11:04:00,-12317.5
5,7,2020-11-25 11:43:00 to 2020-11-26 09:45:00,-12093.75
6,13,2020-10-27 10:24:00 to 2020-10-28 10:22:00,-11358.75
7,9,2020-12-22 10:46:00 to 2020-12-23 10:27:00,-11306.25
8,7,2020-11-20 09:57:00 to 2020-11-23 09:40:00,-10026.25
9,8,2021-01-20 09:32:00 to 2021-01-20 10:20:00,-9538.75


In [15]:
main_df.iloc[:10]

Unnamed: 0,index,Key,ExitTime,Symbol,EntryPrice,Quantity,Pnl,Date,score,streak,streak_number,streak_count
0,0,2020-08-03 09:20:00,2020-08-03 10:16:00,BANKNIFTY06AUG2021200CE,441.15,-25,-775.0,2020-08-03,0,True,1,1
1,1,2020-08-03 10:26:00,2020-08-03 10:34:00,BANKNIFTY06AUG2021200CE,439.2,-25,-545.0,2020-08-03,0,False,1,2
2,0,2020-08-03 10:25:00,2020-08-03 10:49:00,NIFTY06AUG2011000PE,113.0,-75,-371.25,2020-08-03,0,False,1,3
3,0,2020-08-03 11:38:00,2020-08-03 11:41:00,BANKNIFTY06AUG2021400PE,394.05,-25,-221.25,2020-08-03,0,False,1,4
4,1,2020-08-03 11:44:00,2020-08-03 12:08:00,BANKNIFTY06AUG2021300PE,325.9,-25,-692.5,2020-08-03,0,False,1,5
5,2,2020-08-03 12:24:00,2020-08-03 12:26:00,BANKNIFTY06AUG2021300PE,344.95,-25,-423.75,2020-08-03,0,False,1,6
6,0,2020-08-03 10:16:00,2020-08-03 12:26:00,NIFTY06AUG2011000PE,117.45,-75,-513.75,2020-08-03,0,False,1,7
7,0,2020-08-03 10:05:00,2020-08-03 12:26:00,BANKNIFTY06AUG2021400PE,376.8,-25,-1016.25,2020-08-03,0,False,1,8
8,1,2020-08-03 10:52:00,2020-08-03 12:26:00,NIFTY06AUG2010950PE,88.7,-75,-307.5,2020-08-03,0,False,1,9
9,0,2020-08-03 09:20:00,2020-08-03 12:33:00,NIFTY06AUG2010950CE,123.6,-75,2602.5,2020-08-03,1,True,2,1


In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns