In [23]:
import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request

def DataFetcher(symbol,interval='1d'):
    suffixes = ['.NS', '.BO']
    for suffix in suffixes:
        try:
            data = yf.download(symbol + suffix, interval=interval,progress=False)
            data.drop(columns='Adj Close', inplace=True)
            data['Time'] = data.index
            data.rename(columns={'Open': 'Open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'Volume'}, inplace=True)
            return data
        except Exception as e:
            print(f"Failed to fetch data for {symbol+suffix}: {e}")
    return pd.DataFrame()  # Return an empty DataFrame if both fetches fail


def rma(close, length):
    alpha = 1 / length
    rma_values = np.zeros_like(close, dtype=float)
    rma_values[length - 1] = np.mean(close[:length])  # Initial SMA calculation

    for i in range(length, len(close)):
        rma_values[i] = alpha * close[i] + (1 - alpha) * rma_values[i - 1]

    return rma_values

def calculate_wma(values, length):
    wma_values = []
    for i in range(len(values)):
        if i+1 < length:
            wma_values.append(None)  # Insufficient data to calculate WMA
        else:
            window = values[i-length+1:i+1]
            weights = np.arange(1, length + 1)
            wma = np.dot(window, weights) / weights.sum()
            wma_values.append(wma)
    return wma_values
def calculate_supertrend(df, length=10, factor=3.0, ma_type='WMA'):
    df = df.reset_index(drop=True)
    df['cv']=df['close'] * df['Volume']
    df['cv_wma']= calculate_wma(df['cv'].tolist(), length)
    df['v_wma']=calculate_wma(df['Volume'].tolist(),length)
    df['vwma']= df['cv_wma'] / df['v_wma']

    # Calculate True Range and ATR
    df['custom_true_range'] = np.where(np.isnan(df['high'].shift(1)), df['high'] - df['low'],
                                       np.maximum(np.maximum(df['high'] - df['low'], np.abs(df['high'] - df['close'].shift(1))),
                                                  np.abs(df['low'] - df['close'].shift(1))))
    atr = rma(df['custom_true_range'].values, length)
    df['atr'] = atr

    # Calculate upper and lower bands
    df['upperBand'] = df['vwma'] + factor * df['atr']
    df['lowerBand'] = df['vwma'] - factor * df['atr']

    # Initialize columns to avoid forward reference
    df['prevLowerBand'] = df['lowerBand'].shift(1)
    df['prevUpperBand'] = df['upperBand'].shift(1)

    df['direction'] = np.nan
    df['superTrend'] = np.nan



    for i in range(1, len(df)):
        df.loc[i, 'lowerBand'] = df.loc[i, 'lowerBand'] if df.loc[i, 'lowerBand'] > df.loc[i, 'prevLowerBand'] or df.loc[i-1, 'close'] < df.loc[i, 'prevLowerBand'] else df.loc[i, 'prevLowerBand']
        df.loc[i, 'upperBand'] = df.loc[i, 'upperBand'] if df.loc[i, 'upperBand'] < df.loc[i, 'prevUpperBand'] or df.loc[i-1, 'close'] > df.loc[i, 'prevUpperBand'] else df.loc[i, 'prevUpperBand']

        if pd.isna(df.loc[i-1, 'atr']):
            df.loc[i, 'direction'] = 1
        elif df.loc[i-1, 'superTrend'] == df.loc[i, 'prevUpperBand']:
            df.loc[i, 'direction'] = -1 if df.loc[i, 'close'] > df.loc[i, 'upperBand'] else 1
        else:
            df.loc[i, 'direction'] = 1 if df.loc[i, 'close'] < df.loc[i, 'lowerBand'] else -1

        df.loc[i, 'superTrend'] = df.loc[i, 'lowerBand'] if df.loc[i, 'direction'] == -1 else df.loc[i, 'upperBand']
    df.drop(columns=['cv', 'cv_wma', 'v_wma', 'custom_true_range', 'vwma', 'prevLowerBand', 'prevUpperBand'], inplace=True)
    #df.dropna(inplace=True)
    return df

def price_sp_wma (df):
  df['price_WMA']=calculate_wma(df['close'], 20)
  df['superTrend_WMA']=calculate_wma(df['superTrend'], 100)
  df.dropna(inplace=True)
  return df

def calculate_data_point(df,n
                         ):
    data_points = []
    label = []
    for i in range(len(df)-1, len(df) - 1 - n, -1):
        data_points.append(df['superTrend'].iloc[i])
        label_i = 1 if df['price_WMA'].iloc[i] > df['superTrend_WMA'].iloc[i] else 0
        label.append(label_i)
    return data_points, label

def calculate_data_points(df,window_size=10):
  data = []
  labels = []
  for i in range(window_size, len(df)):
    data_point,label=calculate_data_point(df[i-window_size:i],window_size)
    data.append(data_point)
    labels.append(label)
  return np.array(data), np.array(labels)


def knn_weighted_series(data, labels, k, x):
    # Ensure data and labels are pandas Series
    if not isinstance(data, pd.Series) or not isinstance(labels, pd.Series):
        raise ValueError("Data and labels must be Pandas Series.")

    # Calculate distances using numpy for vectorized operations
    distances = np.abs(data - x)

    # Sort the distances and get the indices of the sorted items
    sorted_indices = distances.argsort()

    # Select the k nearest neighbors
    nearest_indices = sorted_indices[:k]

    # Calculate weights, which are inversely proportional to the distance
    weights = 1 / (distances.iloc[nearest_indices] + 1e-6)
    weighted_labels = weights * labels.iloc[nearest_indices]

    weighted_sum = weighted_labels.sum()
    total_weight = weights.sum()

    return weighted_sum / total_weight if total_weight else 0

def apply_corrected_trading_logic(df):
    df = df.reset_index(drop=True)
    last_signal = 'none'
    signals = ['none'] * len(df)  # Initialize all signals to 'none'
    
    for i in range(1, len(df)):
        if last_signal != 'long' and ((df.loc[i, 'label_'] == 1 and (df.loc[i-1, 'label_'] != 1 or df.loc[i-1, 'label_'] not in [1,0] )) or (df.loc[i, 'direction'] == -1 and df.loc[i-1, 'direction'] == 1 and df.loc[i, 'label_'] == 1)):
            signals[i] = 'Buy'
            last_signal = 'long'
        elif last_signal == 'long' and ((df.loc[i, 'close'] < df.loc[i, 'longTrailingStop']) or (df.loc[i, 'label_'] == 1 and df.loc[i, 'direction']== 1) or (df.loc[i, 'label_'] == 0 and df.loc[i, 'direction']==  -1 ) ):
            signals[i] = 'Sell'
            last_signal = 'none'
        # Add additional elif blocks here to handle other conditions, such as short entry and exit

    df['signal'] = signals
    return df[df['signal'] != 'none']

def fetch_last_n_days_data(df, n):
    return df[df['Time'] >=  pd.Timestamp.now() - pd.Timedelta(days=n)]

def get_market_cap(symbol):
    link = f'https://www.screener.in/company/{symbol}'
    hdr = {'User-Agent':'Mozilla/5.0'}
    req = Request(link,headers=hdr)
    market_cap = 0.0

    try:
        page = urlopen(req)
        soup = BeautifulSoup(page, 'html.parser')

        div_html = soup.find('div',{'class': 'company-ratios'})
        ul_html = div_html.find('ul',{'id': 'top-ratios'})

        for li in ul_html.find_all("li"):
            name_span = li.find('span',{'class':'name'})
            if 'Market Cap' in name_span.text:
                num_span = li.find('span',{'class':'number'})
                num_span = num_span.text.replace(',', '')
                market_cap = float(num_span) if (num_span != '') else 0.0
                break

        return market_cap

    except Exception as e:
        return -1



def main(df):
  df=calculate_supertrend(df)
  df=price_sp_wma(df)
  data,labels =calculate_data_points(df)
  df=df[10:]
  data = [pd.Series(arr) for arr in data]
  lables = [pd.Series(arr) for arr in labels]
  df['data']=data
  df['labels']=lables
  df['label_'] = df.apply(lambda row: knn_weighted_series(row['data'], row['labels'], 3, row['superTrend']), axis=1)
  df.drop(columns=['data','labels','price_WMA','superTrend_WMA'], inplace=True)
  df['longTrailingStop'] = df['superTrend'] - (df['atr'] * 3)
  df = apply_corrected_trading_logic(df)
  df=df[['Time','close','signal']][df['signal'] != 'none']
  df= fetch_last_n_days_data(df,1)
  return df



In [25]:
stocks_df=pd.read_csv('STOCKS_1.csv')
stocks_df = stocks_df.dropna(subset=['Symbol'])
stocks=stocks_df['Symbol']

In [None]:
all_stocks_data = []  # Initialize an empty list to store individual stock data
failed_stocks=[]
total_stocks = len(stocks)

for stock in tqdm(stocks[922:1383], desc="Fetching data", unit="stock"):
  try:
    short_df=DataFetcher(stock)
  except:
    failed_stocks.append(stock)
    continue
  if get_market_cap(stock) >=970 or get_market_cap(stock) == -1:
    try:
      short_df=main(short_df)
      short_df['Symbol'] = stock
      all_stocks_data.append(short_df)
    except:
      continue

all_stocks_df = pd.concat(all_stocks_data)  # Concatenate all stock dataframes

Fetching data:   1%|▏         | 33/2302 [09:37<9:19:41, 14.80s/stock]ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['MM.NS']: Exception('%ticker%: No timezone found, symbol may be delisted')
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data']=data
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data']=data
Fetching data:  10%|▉         | 230/2302 [55:11<7:39:00, 13.29s/stock]

In [None]:
all_stocks_df['Market Cap cr'] = all_stocks_df['Symbol'].apply(get_market_cap)
all_stocks_df.to_csv('knn_all_stocks_3.csv', index=False)