In [7]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.cluster import KMeans
from tqdm import tqdm
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request

def DataFetcher(symbol,interval ='1d'):
    suffixes = ['.NS', '.BO']
    for suffix in suffixes:
        try:
            data = yf.download(symbol + suffix, interval=interval,progress=False)
            data.drop(columns='Adj Close', inplace=True)
            data['Time'] = data.index
            data.rename(columns={'Open': 'Open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'Volume'}, inplace=True)
            return data
        except Exception as e:
            print(f"Failed to fetch data for {symbol+suffix}: {e}")
    return pd.DataFrame()  # Return an empty DataFrame if both fetches fail

def rma(close, length):
    alpha = 1 / length
    rma_values = np.zeros_like(close)
    rma_values[0] = close[0]  # Initialize with the first value for continuity
    for i in range(1, len(close)):
        rma_values[i] = alpha * close[i] + (1 - alpha) * rma_values[i - 1]
    return rma_values

def calculate_custom_atr(df, length=10):
    df['custom_true_range'] = np.where(np.isnan(df['high'].shift(1)),
                                       df['high'] - df['low'],
                                       np.maximum(np.maximum(df['high'] - df['low'],
                                                             np.abs(df['high'] - df['close'].shift(1))),
                                                  np.abs(df['low'] - df['close'].shift(1))))
    df['atr'] = rma(df['custom_true_range'].values, length)
    return df

def calculate_supertrends(df, length=10, min_mult=1, max_mult=5, step=0.5, perfAlpha=10):
    """Calculate Supertrend for multiple factors and prepare for clustering."""
    factors = np.arange(min_mult, max_mult + step, step)
    df['hl2'] = (df['high'] + df['low']) / 2

    for factor in factors:
        # Initialize columns for calculations
        df[f'up_{factor}'] = df['hl2'] + (factor * df['atr'])
        df[f'dn_{factor}'] = df['hl2'] - (factor * df['atr'])
        df[f'upper_{factor}'] = df['hl2'].copy()
        df[f'lower_{factor}'] = df['hl2'].copy()
        df[f'trend_{factor}'] = 0
        df[f'perf_{factor}'] = 0

        # Initialize output with hl2 as default, will be updated at the end
        df[f'output_{factor}'] = 0

        for i in range(1, len(df)):
            # Trend determination logic
            df[f'trend_{factor}'].iat[i] = 1 if df['close'].iat[i] > df[f'upper_{factor}'].iat[i-1] else 0 if df['close'].iat[i] < df[f'lower_{factor}'].iat[i-1] else df[f'trend_{factor}'].iat[i-1]

            # Update upper and lower based on the trend
            df[f'upper_{factor}'].iat[i] = min(df[f'up_{factor}'].iat[i],df[f'upper_{factor}'].iat[i-1]) if (df['close'].iat[i-1] < df[f'upper_{factor}'].iat[i-1]) else df[f'up_{factor}'].iat[i]
            df[f'lower_{factor}'].iat[i] = max(df[f'dn_{factor}'].iat[i],df[f'lower_{factor}'].iat[i-1]) if df['close'].iat[i-1] > df[f'lower_{factor}'].iat[i-1] else df[f'dn_{factor}'].iat[i]

            # Calculate diff for performance evaluation
            diff = np.sign(df['close'].iat[i] - df[f'output_{factor}'].iat[i-1])

            # Update perf using diff and close price change
            close_change = df['close'].iat[i] - df['close'].iat[i-1]
            df[f'perf_{factor}'].iat[i] = df[f'perf_{factor}'].iat[i-1] + 2 / (perfAlpha + 1) * (close_change * diff - df[f'perf_{factor}'].iat[i-1])

            # Update output based on the updated trend
            df[f'output_{factor}'].iat[i] = df[f'lower_{factor}'].iat[i] if df[f'trend_{factor}'].iat[i] == 1 else df[f'upper_{factor}'].iat[i]

    return df, factors


def calculate_target_factor_and_supertrend(df, factors, length=10, n_clusters=3):
    target_factors = []
    for i, row in df.iterrows():
        # Gather performance indices for this row across all factors
        perf_indices = np.array([row[f'perf_{factor}'] for factor in factors])

        # Apply k-means clustering to these performance indices
        if len(set(perf_indices)) > 1:  # Ensure we have more than one unique value for meaningful clustering
            kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(perf_indices.reshape(-1, 1))
            labels = kmeans.labels_
            centroids = kmeans.cluster_centers_.flatten()

            # Identify the best-performing cluster (e.g., cluster with the highest centroid value)
            best_cluster_idx = np.argmax(centroids)

            # Calculate the average factor of the best-performing cluster
            best_factors = [factors[j] for j, label in enumerate(labels) if label == best_cluster_idx]
            target_factor = np.mean(best_factors)
        else:
            # If all performance indices are the same, just use the first factor as the default
            target_factor = factors[0]

        target_factors.append(target_factor)

    # Add target factor to DataFrame
    df['target_factor'] = target_factors

    up = df['hl2'] + (df['target_factor'] * df['atr'])
    dn = df['hl2'] - (df['target_factor'] * df['atr'])
    df['up'], df['dn'] = up, dn
    df['upper'], df[f'lower'] = up, dn
    df['superTrend'] = 0
    df['os']=0
    for i in range(1, len(df)):
      df['upper'].iat[i] = min(up.iat[i],df['hl2'].iat[i]) if (df['close'].iat[i-1] < df['upper'].iat[i]) else up.iat[i]
      df['lower'].iat[i] = max(dn.iat[i],df['hl2'].iat[i]) if df['close'].iat[i-1] > df['lower'].iat[i-1] else dn.iat[i]
      df['os'].iat[i] = 1 if df['close'].iat[i] > df['hl2'].iat[i] else 0 if df['close'].iat[i] < df['hl2'].iat[i] else 0
      df['superTrend'].iat[i]= df['lower'].iat[i] if df['os'].iat[i]==1 else df['upper'].iat[i]
    return df

def generate_signals(df):
    # Initialize the signal column with None
    df['signal'] = 'None'

    # Loop through the DataFrame and generate signals
    for i in range(1, len(df)):
        if df['os'].iat[i] == 1 and df['os'].iat[i-1] == 0:
            df['signal'].iat[i] = 'Buy'
        elif df['os'].iat[i] == 0 and df['os'].iat[i-1] == 1:
            df['signal'].iat[i] = 'Sell'

    return df[['Time','close','signal']]

# Assuming your DataFrame 'df' is already processed and contains the 'os' column


def get_current_close_price(ticker_symbol):
    suffixes = ['.NS', '.BO']
    for suffix in suffixes:
      try:
        ticker = yf.Ticker(ticker_symbol + suffix)
        data = ticker.history(period='1d')
        current_close_price = data['Close'].iloc[-1]
        return current_close_price
      except :
        continue

def calculate_profit_percentage(ticker_symbol, df):
    initial_principal = 100000  # Starting with 1 lakh
    cash = initial_principal
    shares = 0

    # Loop through the DataFrame to process buy and sell signals
    for i, row in df.iterrows():
        if row['signal'] == 'Buy' and cash > 0:
            possible_shares = cash // row['close']
            if possible_shares > 0:
                shares += possible_shares
                cash -= possible_shares * row['close']
        elif row['signal'] == 'Sell' and shares > 0:
            cash += shares * row['close']
            shares = 0

    # Retrieve the current close price for the final calculation
    last_price = get_current_close_price(ticker_symbol)

    if shares > 0:  # Still holding shares at the end
        cash += shares * last_price  # Convert shares back to cash for the final total

    final_principal = cash  # Final amount after the last sell or the end of the period
    profit = final_principal - initial_principal  # Total profit or loss
    profit_percentage = (profit / initial_principal) * 100  # Profit percentage

    return profit_percentage


def get_market_cap(symbol):
    link = f'https://www.screener.in/company/{symbol}'
    hdr = {'User-Agent':'Mozilla/5.0'}
    req = Request(link,headers=hdr)
    market_cap = 0.0

    try:
        page = urlopen(req)
        soup = BeautifulSoup(page, 'html.parser')

        div_html = soup.find('div',{'class': 'company-ratios'})
        ul_html = div_html.find('ul',{'id': 'top-ratios'})

        for li in ul_html.find_all("li"):
            name_span = li.find('span',{'class':'name'})
            if 'Market Cap' in name_span.text:
                num_span = li.find('span',{'class':'number'})
                num_span = num_span.text.replace(',', '')
                market_cap = float(num_span) if (num_span != '') else 0.0
                break

        return market_cap

    except Exception as e:
        return -1

def fetch_last_n_days_data(df,n):
  return df[df['Time'] >=  pd.Timestamp.now() - pd.Timedelta(days=n)]

def main(symbol):
  df= DataFetcher(symbol)
  df=calculate_custom_atr(df)
  df , factors = calculate_supertrends(df)
  df = calculate_target_factor_and_supertrend(df, factors)
  df = generate_signals(df)
  df=df[df['signal'] !='None']
  percentage = calculate_profit_percentage(symbol,df)
  return df,percentage

In [9]:
stocks_df=pd.read_csv('STOCKS_1.csv')
stocks_df = stocks_df.dropna(subset=['Symbol'])
stocks=stocks_df['Symbol']

In [10]:
all_stocks_data = []  # Initialize an empty list to store individual stock data
failed_stocks=[]
total_stocks = len(stocks)

for stock in tqdm(stocks[801:1201], desc="Fetching data", unit="stock"):
  try:
    if get_market_cap(stock) >=970 or get_market_cap(stock) == -1:
      df,percentage = main(stock)
      short_df=fetch_last_n_days_data(df,1)
      short_df['Symbol'] = stock
      short_df['Market Cap cr']= get_market_cap(stock)
      short_df['Profit %']= str(percentage)+ '%'
      all_stocks_data.append(short_df)
  except:
    failed_stocks.append(stock)
    continue

all_stocks_df = pd.concat(all_stocks_data)  # Concatenate all stock dataframes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_df['Symbol'] = stock
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_df['Market Cap cr']= get_market_cap(stock)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_df['Profit %']= str(percentage)+ '%'
Fetching data: 100%|██████████| 10/10 [01:56<00:00, 11.70s/stock]


In [11]:
all_stocks_df.to_csv('kMeans_all_stocks_3.csv', index=False)