## Steps of code
1. Extracting data from tvdatafeed
2. Using a statistical approach to generate the labels(1->buy , 0->nothing , -1->sell)
3. dividing the data into train and test data
4. Training the data
5. Comparing the models
6. choosing best model
7. merge the long and short trades in one file
8. Backtesting the result based on the merged file

#### Assumptions
1. During finding the returns we assume to start the trade on open and sqare off of the trade at the close of trade
2. Due to find the solution for the both long and short trades, we are avoiding the stop loss approach till now , buy in future definetly work upon it

## Data Extraction from TradingView using python module tvDatafeed

In [1]:
from tvDatafeed import TvDatafeed, Interval


ModuleNotFoundError: No module named 'tvDatafeed'

In [2]:
with open('Credendials.txt','r') as file:
    content = file.read().split()

FileNotFoundError: [Errno 2] No such file or directory: 'Credendials.txt'

In [None]:
username = content[0]
password = content[1]

In [None]:
tv = TvDatafeed(username, password)

In [None]:
data = tv.get_hist(symbol='BANKNIFTY',exchange='NSE',interval=Interval.in_5_minute,n_bars=10000)

In [None]:
data.head()

In [None]:
data.to_csv('BANKNIFTY_till_14thMarch_5min.csv')

### Dividing the data into the training and testing data
we won't be able to use 80% or 20% ratio for training and testing data , because it is time series data the data is more related to back data , that is why we won't be able to follow any specific ratio
So, Solution for this is the way that result in the dividing the dataset depending that training data is around 80% of the total data and also the dependency is minimal

In [4]:
import pandas as pd
final_df = pd.read_csv('Final_data_5min.csv')

In [5]:
len(final_df)

8108

In [6]:
# the value in the below cell 6509 is given to the approx value of 80%, not exactly 80% due to the fact that 

In [7]:
train_data = final_df.iloc[:6509]

In [8]:
test_data = final_df.iloc[6509:]

In [9]:
len(test_data)

1599

In [10]:
train_data.tail()

Unnamed: 0,datetime,symbol,open,high,low,close,volume
6504,2024-02-13 15:05:00,NSE:BANKNIFTY,45504.9,45529.5,45489.25,45521.0,4875497.0
6505,2024-02-13 15:10:00,NSE:BANKNIFTY,45524.6,45565.95,45501.55,45521.45,5860964.0
6506,2024-02-13 15:15:00,NSE:BANKNIFTY,45521.95,45564.5,45500.75,45500.75,6589835.0
6507,2024-02-13 15:20:00,NSE:BANKNIFTY,45506.0,45517.0,45437.25,45453.0,6617515.0
6508,2024-02-13 15:25:00,NSE:BANKNIFTY,45454.5,45454.5,45383.15,45419.85,4611935.0


In [11]:
test_data.head()

Unnamed: 0,datetime,symbol,open,high,low,close,volume
6509,2024-02-14 09:15:00,NSE:BANKNIFTY,45095.45,45140.1,44988.7,45034.3,10394636.0
6510,2024-02-14 09:20:00,NSE:BANKNIFTY,45030.25,45119.75,44965.75,44987.65,4113601.0
6511,2024-02-14 09:25:00,NSE:BANKNIFTY,44988.0,44988.0,44880.75,44940.1,6873600.0
6512,2024-02-14 09:30:00,NSE:BANKNIFTY,44942.65,44951.65,44860.75,44913.85,5835451.0
6513,2024-02-14 09:35:00,NSE:BANKNIFTY,44912.7,45070.5,44902.85,45059.6,5188373.0


#### Hence, there is no overlapping in the data between them

In [12]:
train_data.to_csv('Data/Train/train_data.csv')

OSError: Cannot save file into a non-existent directory: 'Data\Train'

In [None]:
test_data.to_csv('Data/Test/test_data.csv')

### Generating buy signals in the dataset 

In [None]:
import pandas as pd
import talib as ta
import matplotlib.pyplot as plt
import warnings
import numpy as np

In [None]:
warnings.filterwarnings('ignore')

In [None]:
df_train = pd.read_csv('Data/Train/train_data.csv')

In [None]:
df_test = pd.read_csv('Data/Test/test_data.csv')

In [None]:
def Indicators(df):
    df['rsi'] = ta.RSI(df['close'],9)
    df['ema3'] = ta.EMA(df['rsi'],6)
    df['wma'] = ta.WMA(df['rsi'],21)
    df['atr'] = ta.ATR(df['high'] , df['low'] , df['close'],7)
    return df

In [None]:
df_train = Indicators(df_train)
df_test = Indicators(df_test)

In [None]:
df_train.dropna(inplace=True)
df_test.dropna(inplace=True)

In [None]:
df_train.head()

In [None]:
df_train.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
df_test.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
len(df_train),len(df_test)

In [None]:
df_train['rsi'].iloc[0]

In [None]:
def Generate_buy_signal(df):
    df['buy_signal'] = 0
    for i in range(1,len(df)):
        if df['rsi'].iloc[i] >= df['ema3'].iloc[i] and df['ema3'].iloc[i] >= df['wma'].iloc[i]:
            df['buy_signal'].iloc[i] = 1
    return df

In [None]:
df_train = Generate_buy_signal(df_train)

In [None]:
df_train.head(10)

In [None]:
df_test = Generate_buy_signal(df_test)

In [None]:
df_test.head(10)

In [None]:
len(df_train) , len(df_test)

In [None]:
df_train.to_csv('Buy_Train_data_with_label.csv')

In [None]:
df_test.to_csv('Buy_Test_data_with_label.csv')

## Generating short sell signals for dataset

In [None]:
df_train = pd.read_csv('Data/Train/train_data.csv')

In [None]:
df_test = pd.read_csv('Data/Test/test_data.csv')

In [None]:
def Indicators(df):
    df['rsi'] = ta.RSI(df['close'],9)
    df['ema3'] = ta.EMA(df['rsi'],6)
    df['wma'] = ta.WMA(df['rsi'],21)
    df['atr'] = ta.ATR(df['high'] , df['low'] , df['close'],7)
    return df

In [None]:
df_train = Indicators(df_train)

In [None]:
df_test = Indicators(df_test)

In [None]:
df_train.dropna(inplace=True)
df_test.dropna(inplace=True)

In [None]:
df_train.head()

In [None]:
df_train.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
df_test.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
def Generate_Short_Sell_signals(df):
    df['Sell_signal'] = 0
    for i in range(1,len(df)):
        if df['wma'].iloc[i] >= df['ema3'].iloc[i] and df['ema3'].iloc[i] >= df['rsi'].iloc[i]:
            df['Sell_signal'].iloc[i] = -1
    return df

In [None]:
df_train = Generate_Short_Sell_signals(df_train)

In [None]:
df_test = Generate_Short_Sell_signals(df_test)

In [None]:
df_train.to_csv('Sell_Train_data_with_label.csv')

In [None]:
df_test.to_csv('Sell_Test_data_with_label.csv')

In [None]:
df_test.head(50)

## Now testing the models

In [None]:
df1 = pd.read_csv('Buy_Train_data_with_label.csv')

In [None]:
df1.head()

In [None]:
df2 = pd.read_csv('Sell_Train_data_with_label.csv')

In [None]:
df3 = pd.read_csv('Buy_Test_data_with_label.csv')

In [None]:
df4 = pd.read_csv('Sell_Test_data_with_label.csv')

In [None]:
x1=df1.iloc[:,3:12]
y1=df1.iloc[:,12]

In [None]:
x1.head()

In [None]:
y1

In [None]:
x2 = df2.iloc[:,3:12]
y2 = df2.iloc[:,12]

In [None]:
x3 = df3.iloc[:,3:12]
y3 = df3.iloc[:,12]

In [None]:
x4 = df4.iloc[:,3:12]
y4 = df4.iloc[:,12]

In [None]:
data={}

## Tried Different models to sell where the dataset fits

## SVC

In [None]:
from sklearn.svm import SVC #SVC- support vector classifier
svc_b=SVC()
svc_s=SVC()

In [None]:
len(x1) , len(y1)

In [None]:
y1.head()

In [None]:
svc_b.fit(x1,y1)

In [None]:
svc_b.score(x1,y1)

In [None]:
svc_s.fit(x2,y2)

In [None]:
data['svc']=[svc_b.score(x3,y3),svc_s.score(x4,y4)]

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr_b=LogisticRegression()
lr_s=LogisticRegression()

In [None]:
lr_b.fit(x1,y1)

In [None]:
lr_s.fit(x2,y2)

In [None]:
data['lr']=[lr_b.score(x3,y3),lr_s.score(x4,y4)]

In [None]:
lr_b.score(x3,y3)

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc_b=DecisionTreeClassifier()
dtc_s=DecisionTreeClassifier()

In [None]:
dtc_b.fit(x1,y1)

In [None]:
dtc_s.fit(x2,y2)

In [None]:
data['dtc']=[dtc_b.score(x3,y3),dtc_s.score(x4,y4)]

In [None]:
dtc_b.score(x3,y3)

In [None]:
data

In [None]:
type(data.keys())

## Visualizing the accuracy comparison between different models

In [None]:
import matplotlib.pyplot as plt
models = (data.keys())
performances = (data.values())

fig, ax = plt.subplots()
bar_width = 0.35
index = range(len(models))

bars1 = ax.bar(index, [perf[0] for perf in performances], bar_width, label='Buy')

bars2 = ax.bar([i + bar_width for i in index], [perf[1] for perf in performances], bar_width, label='Sell')

ax.set_xlabel('Models')
ax.set_ylabel('Performance')
ax.set_title('Accuracy of different models on buy and sell dataset')
ax.set_xticks([i + bar_width / 2 for i in index])
ax.set_xticklabels(models)
ax.set_ylim(0, 1)
ax.legend()

plt.show()

In [None]:
list_buy = dtc_b.predict(x3)

In [None]:
len(list_buy)

In [None]:
list_sell = dtc_s.predict(x4)

In [None]:
len(list_sell)

## Merging the Buy and Sell signals

In [None]:
df1 = pd.read_csv('Buy_test_data_with_label.csv')

In [None]:
df1.head()

In [None]:
df_temp = pd.read_csv('Sell_Test_data_with_label.csv')

In [None]:
len(df_temp)

In [None]:
df_temp.head()

In [None]:
df1 = pd.DataFrame()

In [None]:
df2 = pd.DataFrame()

In [None]:
df1['buy_signal'] = list_buy

In [None]:
len(df1) , len(df_temp)

In [None]:
df1['datetime'] = df_temp['datetime']

In [None]:
df2['Sell_signal'] = list_sell

In [None]:
'''long == -1 not inmarket , long = 1 , that is long trade is going on and long = 0 short trade is going on'''
def merge(df1,df2):
    list = [0]*len(df1)
    long = -1
    inmarket = 0
    for i in range(len(df1)):
        # print(df1['datetime'][p],df2['datetime'][p])
        if inmarket == 0:
            if df1['buy_signal'][i] == 1 and df2['Sell_signal'][i] == -1:
                continue
            elif df1['buy_signal'][i] == 0 and df2['Sell_signal'][i]==0:
                continue
            elif df1['buy_signal'][i] == 1:
                inmarket = 1
                long = 1
                list[i] = 1
            elif df2['Sell_signal'][i] == -1:
                inmarket = 1
                long = 0
                list[i] = -1
        elif inmarket == 1:
            if long == 1:
                if df1['buy_signal'][i] == 0 or df2['Sell_signal'][i] ==-1:
                    inmarket = 0
                    long = -1
                    list[i] = -1
            elif long == 0:
                if df1['buy_signal'][i] == 1 or df2['Sell_signal'][i] == 0:
                    inmarket = 0
                    long = -1
                    list[i] = 1
    return list

In [None]:
# below cell contain merge function to merge buy and sell signal

In [None]:
signals = merge(df1,df2)

In [None]:
signals

In [None]:
df1.head()

In [None]:
df2.head()

In [None]:
df = pd.DataFrame()

In [None]:
df['datetime'] = df1['datetime']

In [None]:
df['buy_signal'] = df1['buy_signal']

In [None]:
df['sell_signal'] = df2['Sell_signal']

In [None]:
df['open'] = df_temp['open']

In [None]:
df['close'] = df_temp['close']

In [None]:
df['high'] = df_temp['high']

In [None]:
df['low'] = df_temp['low']

In [None]:
df['volume'] = df_temp['volume']

In [None]:
df['signals'] = signals

In [None]:
df.head()

In [None]:
df.to_csv('Long-Short-Signals.csv')

In [None]:
df.head(50)

## Checking the backtested results
The actual results of the backtesting code would be far greater than the result shown, as the rate in which money increase is far greater than the result atleast 10X of the shown result
The actual calculation includes the involvement of concepts like option greeks , strike price and all , that may change the centre of the project

1. Number of Trades (done)
2. Number of long Trades (done)
3. Number of Short Trades (done)
4. Win Rate (done)
5. Number of profitable Trades (done)
6. Number of long profitable Trades (done)
7. Number of short profitable Trades (done)
8. Total returns   (done)
9. Returns from long trades (done)
10. Returns from short trades (done)

In [None]:
df = pd.read_csv('Long-Short-Signals.csv')

In [None]:
len(df)

In [None]:
df.head()

In [None]:
df.set_index('datetime',inplace=True)

In [None]:
df.head()

In [None]:
def SumColumn(df):
    df['sum'] = 0
    df['sum'][0] = df['signals'][0]
    for i in range(1,len(df)):
        df['sum'][i] = df['sum'][i-1] + df['signals'][i]
    return df

In [None]:
df = SumColumn(df)

In [None]:
df.head(50)

In [None]:
def NumOfTrades(df):
    total = 0
    long = 0
    short = 0
    for i in range(len(df)):
        if df['signals'][i] == 1 and df['sum'][i] == 1:
            long += 1
        if df['signals'][i] == -1 and df['sum'][i] == -1:
            short += 1
    return long , short , long+ short

In [None]:
long_trades , short_trades , total_trades = NumOfTrades(df)

In [None]:
long_trades,short_trades,total_trades

In [None]:
df_full = pd.read_csv('Buy_Train_data_with_label.csv')

In [None]:
df_full.head(2)

In [None]:
df_full.isna().sum()

In [None]:
len(df) , len(df_full)

In [None]:
# df['open'] = df_full['open']
# df['close'] = df_full['close']
# df['low'] = df_full['low']
# df['high'] = df_full['high']
# df['volume'] = df_full['volume']

In [None]:
df.head()

In [None]:
for i in range(len(df)):
    df['open'][i] = df_full['open'][i]
    df['close'][i] = df_full['close'][i]

In [None]:
# initital is the amount 
def long_return(df , initial):
    profit = 0
    entry = -1
    long_profit_trades = 0
    max_profit = 0
    list = []
    df['long_returns'] = -1
    for i in range(len(df)):
        if df['sum'][i] == 1 and df['signals'][i] == 1:
            entry = df['open'][i]
        elif df['sum'][i]==0 and df['signals'][i] == -1:
            final = (df['close'][i]/entry)*initial
            df['long_returns'][i] = (final-initial)/initial
            profit += (final-initial)
            list.append(final-initial)
            max_profit = max(max_profit , final-initial)
            if( final > initial ):
                long_profit_trades += 1
    return profit , long_profit_trades , max_profit , list ,  df

In [None]:
total_long_profit , Num_long_profit_trades , max_long_profit , list ,  df = long_return(df , 1000)

In [None]:
total_long_profit , Num_long_profit_trades , max_long_profit

In [None]:
list

In [None]:
def short_return(df , initial):
    profit = 0
    entry = -1
    short_profit_trades = 0
    max_profit = 0
    list = []
    df['short_returns'] = -1
    for i in range(len(df)):
        if df['sum'][i] == -1 and df['signals'][i] == -1:
            entry = df['open'][i]
        elif df['sum'][i] == 0 and df['signals'][i] == 1:
            final = (df['close'][i]/entry)*initial
            profit += (initial-final)
            df['short_returns'][i] = profit/initial
            list.append(initial-final)
            max_profit = max(max_profit , initial-final)
            if initial > final :
                short_profit_trades += 1
    return profit , short_profit_trades , max_profit , list , df
            

In [None]:
total_short_profit , Num_short_profit_trades , short_max_profit , list , df = short_return(df , 1000)

In [None]:
total_short_profit , Num_short_profit_trades , short_max_profit 

In [None]:
def win_rate(long_trades , short_trades , Num_long_profit_trades , Num_short_profit_trades ):
    long_win_rate = Num_long_profit_trades/long_trades
    short_win_rate = Num_short_profit_trades/short_trades
    win_rate = (Num_long_profit_trades+Num_short_profit_trades)/(long_trades+short_trades)
    return long_win_rate , short_win_rate , win_rate

In [None]:
long_win_rate , short_win_rate , win_rate = win_rate(long_trades , short_trades , Num_long_profit_trades , Num_short_profit_trades )

In [None]:
long_win_rate , short_win_rate , win_rate

In [None]:
df.head()

In [None]:
df.to_csv('Data_for_backtesting.csv')

## Using Backtesting module

In [None]:
from backtesting import Backtest, Strategy

In [None]:
df = pd.read_csv('Data_for_backtesting.csv')

In [None]:
df['Close'] = df['close']
df['Open'] = df['open']
df['Low'] = df['low']
df['High'] = df['high']
df['Volume'] = df['volume']
df['Open'] /= 1e6
df['High'] /= 1e6
df['Low'] /= 1e6
df['Close'] /= 1e6

In [None]:
df.head()

In [None]:
class MyStrat(Strategy):
    def init(self):
        pass
    def next(self):
        if self.data['sum'] == 0:
            self.position.close()
        elif self.data['signals'] == -1 and self.data['sum'] == -1:
            self.sell()
        elif self.data['signals'] == 1 and self.data['sum'] == 1:
            self.buy()


In [None]:
bt = Backtest(df, MyStrat, cash=1000, commission=0.001,trade_on_close=True)
result = bt.run()
bt.plot(superimpose=True)