In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
import time
from datetime import datetime, timedelta
from dataclasses import dataclass
from typing import Tuple, Optional, Dict, Any, List
import pytz
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

default_directory = r'C:\Users\Bhuvanesh\Desktop\ALGO_TRADING_STRATEGIES\KNN_STOCK_ALERT'
os.chdir(default_directory)

@dataclass
class TradingConfig:
    """Configuration class for trading parameters"""
    initial_capital: float = 100000
    min_data_points: int = 100  # More stringent requirement
    transaction_cost: float = 0.002  # 0.2% transaction cost

config = TradingConfig()

def DataFetcher(symbol: str, interval: str = '1d', min_years: int = 2):

    tz = pytz.timezone('Asia/Kolkata')
    suffixes = ['.NS', '.BO']
    min_date = datetime.now(tz) - timedelta(days=min_years * 365)
    
    for suffix in suffixes:
        try:
            ticker_symbol = symbol + suffix
            ticker = yf.Ticker(ticker_symbol)
            
            # Fetch data with retry mechanism
            for attempt in range(3):
                try:
                    data = ticker.history(period='max', interval=interval)
                    break
                except Exception as e:
                    if attempt == 2:
                        raise e
                    time.sleep(1)
            
            if data.empty or len(data) < config.min_data_points:
                continue
                
            # Filter for minimum time period
            data = data[data.index >= min_date]
            
            if len(data) < config.min_data_points:
                continue
            
            # Data validation and cleaning
            required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
            if not all(col in data.columns for col in required_cols):
                continue
            
            # Remove invalid data
            data = data.dropna()
            data = data[data['Volume'] > 0]
            data = data[(data['High'] >= data['Low']) & (data['High'] >= data['Close']) & 
                       (data['Low'] <= data['Close']) & (data['Open'] > 0)]
            
            if len(data) < config.min_data_points:
                continue
                
            # Standardize columns
            data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
            data.rename(columns={
                'Open': 'open', 'High': 'high', 'Low': 'low', 
                'Close': 'close', 'Volume': 'Volume'
            }, inplace=True)
            
            data['Time'] = data.index.date
            data.reset_index(drop=True, inplace=True)
            
            return data, ticker_symbol
            
        except Exception as e:

            continue
    
    return pd.DataFrame(), None

def rma(close, length):
    alpha = 1 / length
    rma_values = np.zeros_like(close, dtype=float)
    rma_values[length - 1] = np.mean(close[:length])  # Initial SMA calculation

    for i in range(length, len(close)):
        rma_values[i] = alpha * close[i] + (1 - alpha) * rma_values[i - 1]

    return rma_values

def calculate_wma(values, length):
    wma_values = []
    for i in range(len(values)):
        if i+1 < length:
            wma_values.append(None)  # Insufficient data to calculate WMA
        else:
            window = values[i-length+1:i+1]
            weights = np.arange(1, length + 1)
            wma = np.dot(window, weights) / weights.sum()
            wma_values.append(wma)
    return wma_values
def calculate_supertrend(df, length=10, factor=3.0, ma_type='WMA'):
    df = df.reset_index(drop=True)
    df['cv']=df['close'] * df['Volume']
    df['cv_wma']= calculate_wma(df['cv'].tolist(), length)
    df['v_wma']=calculate_wma(df['Volume'].tolist(),length)
    df['vwma']= df['cv_wma'] / df['v_wma']

    # Calculate True Range and ATR
    df['custom_true_range'] = np.where(np.isnan(df['high'].shift(1)), df['high'] - df['low'],
                                       np.maximum(np.maximum(df['high'] - df['low'], np.abs(df['high'] - df['close'].shift(1))),
                                                  np.abs(df['low'] - df['close'].shift(1))))
    atr = rma(df['custom_true_range'].values, length)
    df['atr'] = atr

    # Calculate upper and lower bands
    df['upperBand'] = df['vwma'] + factor * df['atr']
    df['lowerBand'] = df['vwma'] - factor * df['atr']

    # Initialize columns to avoid forward reference
    df['prevLowerBand'] = df['lowerBand'].shift(1)
    df['prevUpperBand'] = df['upperBand'].shift(1)

    df['direction'] = np.nan
    df['superTrend'] = np.nan



    for i in range(1, len(df)):
        df.loc[i, 'lowerBand'] = df.loc[i, 'lowerBand'] if df.loc[i, 'lowerBand'] > df.loc[i, 'prevLowerBand'] or df.loc[i-1, 'close'] < df.loc[i, 'prevLowerBand'] else df.loc[i, 'prevLowerBand']
        df.loc[i, 'upperBand'] = df.loc[i, 'upperBand'] if df.loc[i, 'upperBand'] < df.loc[i, 'prevUpperBand'] or df.loc[i-1, 'close'] > df.loc[i, 'prevUpperBand'] else df.loc[i, 'prevUpperBand']

        if pd.isna(df.loc[i-1, 'atr']):
            df.loc[i, 'direction'] = 1
        elif df.loc[i-1, 'superTrend'] == df.loc[i, 'prevUpperBand']:
            df.loc[i, 'direction'] = -1 if df.loc[i, 'close'] > df.loc[i, 'upperBand'] else 1
        else:
            df.loc[i, 'direction'] = 1 if df.loc[i, 'close'] < df.loc[i, 'lowerBand'] else -1

        df.loc[i, 'superTrend'] = df.loc[i, 'lowerBand'] if df.loc[i, 'direction'] == -1 else df.loc[i, 'upperBand']
    df.drop(columns=['cv', 'cv_wma', 'v_wma', 'custom_true_range', 'vwma', 'prevLowerBand', 'prevUpperBand'], inplace=True)
    df.dropna(inplace=True)
    return df

def price_sp_wma (df):
  df['price_WMA']=calculate_wma(df['close'], 20)
  df['superTrend_WMA']=calculate_wma(df['superTrend'], 100)
  df.dropna(inplace=True)
  return df

def calculate_data_point(df,n):
    data_points = []
    label = []
    for i in range(len(df)-1, len(df) - 1 - n, -1):
        data_points.append(df['superTrend'].iloc[i])
        label_i = 1 if df['price_WMA'].iloc[i] > df['superTrend_WMA'].iloc[i] else 0
        label.append(label_i)
    return data_points, label

def calculate_data_points(df,window_size=10):
  data = []
  labels = []
  for i in range(window_size, len(df)):
    data_point,label=calculate_data_point(df[i-window_size:i],window_size)
    data.append(data_point)
    labels.append(label)
  return np.array(data), np.array(labels)


def knn_weighted_series(data, labels, k, x):
    # Ensure data and labels are pandas Series
    if not isinstance(data, pd.Series) or not isinstance(labels, pd.Series):
        raise ValueError("Data and labels must be Pandas Series.")

    # Calculate distances using numpy for vectorized operations
    distances = np.abs(data - x)

    # Sort the distances and get the indices of the sorted items
    sorted_indices = distances.argsort()

    # Select the k nearest neighbors
    nearest_indices = sorted_indices[:k]

    # Calculate weights, which are inversely proportional to the distance
    weights = 1 / (distances.iloc[nearest_indices] + 1e-6)
    weighted_labels = weights * labels.iloc[nearest_indices]

    weighted_sum = weighted_labels.sum()
    total_weight = weights.sum()

    return weighted_sum / total_weight if total_weight else 0

def apply_corrected_trading_logic(df):
    df = df.reset_index(drop=True)
    last_signal = 'none'
    signals = ['none'] * len(df)  # Initialize all signals to 'none'
    
    for i in range(1, len(df)):
        if last_signal != 'long' and ((df.loc[i, 'label_'] == 1 and (df.loc[i-1, 'label_'] != 1 or df.loc[i-1, 'label_'] not in [1,0] )) or (df.loc[i, 'direction'] == -1 and df.loc[i-1, 'direction'] == 1 and df.loc[i, 'label_'] == 1)):
            signals[i] = 'Buy'
            last_signal = 'long'
        elif last_signal == 'long' and ((df.loc[i, 'close'] < df.loc[i, 'longTrailingStop']) or (df.loc[i, 'label_'] == 1 and df.loc[i, 'direction']== 1) or (df.loc[i, 'label_'] == 0 and df.loc[i, 'direction']==  -1 ) ):
            signals[i] = 'Sell'
            last_signal = 'none'
        # Add additional elif blocks here to handle other conditions, such as short entry and exit

    df['signal'] = signals
    return df


def back_test_metrice(df: pd.DataFrame):
    """Simple profit calculation trading full amount based on signals, stop loss and target"""
    initial_capital = config.initial_capital
    cash = initial_capital
    shares = 0
    trades = []
    current_trade = None
    
    for idx, row in df.iterrows():
        current_price = row['close']
        
        # Buy Signal - Use full available cash
        if row['signal'] == 'Buy' and cash > 0 and shares == 0:
            # Calculate shares to buy with full cash minus transaction costs
            gross_investment = cash * 0.99  # Reserve 1% for transaction costs
            shares_to_buy = int(gross_investment / current_price)
            
            if shares_to_buy > 0:
                cost = shares_to_buy * current_price
                transaction_cost = cost * config.transaction_cost
                total_cost = cost + transaction_cost
                
                if total_cost <= cash:
                    shares = shares_to_buy
                    cash -= total_cost
                    current_trade = {
                        'entry_price': current_price,
                        'entry_date': row['Time'],
                        'shares': shares,
                        # 'stop_loss': current_price * (1 - config.stop_loss_pct),
                        # 'take_profit': current_price * (1 + config.take_profit_pct)
                    }
        
        # Sell Logic - Check stop loss, take profit, or sell signal
        elif shares > 0:
            sell_triggered = False
            sell_reason = 'Signal'
            
            if current_trade:
                # Check stop loss
                # if current_price <= current_trade['stop_loss']:
                #     sell_triggered = True
                #     sell_reason = 'Stop Loss'
                # Check take profit
                # elif current_price >= current_trade['take_profit']:
                #     sell_triggered = True
                #     sell_reason = 'Take Profit'
                # Check sell signal
                if row['signal'] == 'Sell':
                    sell_triggered = True
                    sell_reason = 'Signal'
            
            # Execute sell if triggered
            if sell_triggered:
                revenue = shares * current_price
                transaction_cost = revenue * config.transaction_cost
                net_revenue = revenue - transaction_cost
                cash += net_revenue
                
                if current_trade:
                    # Calculate profit/loss
                    initial_investment = shares * current_trade['entry_price']
                    profit = net_revenue - initial_investment
                    profit_pct = profit / initial_investment * 100
                    
                    trades.append({
                        'entry_date': current_trade['entry_date'],
                        'exit_date': row['Time'],
                        'entry_price': current_trade['entry_price'],
                        'exit_price': current_price,
                        'shares': shares,
                        'initial_investment': initial_investment,
                        'final_value': net_revenue,
                        'profit': profit,
                        'profit_pct': profit_pct,
                        'reason': sell_reason,
                        'holding_days': (row['Time'] - current_trade['entry_date']).days
                    })
                
                shares = 0
                current_trade = None
    
    # Close any remaining position at the end
    if shares > 0 and not df.empty:
        final_price = df['close'].iloc[-1]
        revenue = shares * final_price
        transaction_cost = revenue * config.transaction_cost
        final_revenue = revenue - transaction_cost
        cash += final_revenue
        
        # Record the final trade
        if current_trade:
            initial_investment = shares * current_trade['entry_price']
            profit = final_revenue - initial_investment
            profit_pct = profit / initial_investment * 100
            
            trades.append({
                'entry_date': current_trade['entry_date'],
                'exit_date': df['Time'].iloc[-1],
                'entry_price': current_trade['entry_price'],
                'exit_price': final_price,
                'shares': shares,
                'initial_investment': initial_investment,
                'final_value': final_revenue,
                'profit': profit,
                'profit_pct': profit_pct,
                'reason': 'End of Data',
                'holding_days': (df['Time'].iloc[-1] - current_trade['entry_date']).days
            })
        
        shares = 0
    
    final_value = cash
    total_return_pct = (final_value - initial_capital) / initial_capital * 100
    
    # Calculate comprehensive trade statistics
    if trades:
        winning_trades = [t for t in trades if t['profit'] > 0]
        losing_trades = [t for t in trades if t['profit'] <= 0]
        
        total_profit = sum(t['profit'] for t in trades)
        total_wins = sum(t['profit'] for t in winning_trades)
        total_losses = sum(t['profit'] for t in losing_trades)
        
        win_rate = len(winning_trades) / len(trades) * 100 if trades else 0
        avg_win_pct = np.mean([t['profit_pct'] for t in winning_trades]) if winning_trades else 0
        avg_loss_pct = np.mean([t['profit_pct'] for t in losing_trades]) if losing_trades else 0
        
        max_win = max([t['profit'] for t in trades]) if trades else 0
        max_loss = min([t['profit'] for t in trades]) if trades else 0
        
        profit_factor = abs(total_wins / total_losses) if total_losses != 0 else float('inf')
        
        avg_holding_days = np.mean([t['holding_days'] for t in trades]) if trades else 0
        min_holding_days = min([t['holding_days'] for t in trades]) if trades else 0
        max_holding_days = max([t['holding_days'] for t in trades]) if trades else 0
        
        # Calculate consecutive wins/losses
        consecutive_wins = 0
        consecutive_losses = 0
        max_consecutive_wins = 0
        max_consecutive_losses = 0
        
        for trade in trades:
            if trade['profit'] > 0:
                consecutive_wins += 1
                consecutive_losses = 0
                max_consecutive_wins = max(max_consecutive_wins, consecutive_wins)
            else:
                consecutive_losses += 1
                consecutive_wins = 0
                max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
    
    else:
        win_rate = avg_win_pct = avg_loss_pct = profit_factor = 0
        max_win = max_loss = avg_holding_days = 0
        max_consecutive_wins = max_consecutive_losses = 0
        total_profit = total_wins = total_losses = 0
    
    return {
        # Portfolio Performance
        # 'initial_capital': initial_capital,
        # 'final_value': final_value,
        'total_return_pct': total_return_pct,
        # 'total_profit': total_profit,
        
        # Trade Statistics
        'total_trades': len(trades),
        'winning_trades': len(winning_trades) if trades else 0,
        'losing_trades': len(losing_trades) if trades else 0,
        'win_rate': win_rate,
        
        # Profit/Loss Analysis
        # 'total_wins': total_wins,
        # 'total_losses': abs(total_losses),
        'total_wins_pct': (total_wins / initial_capital) * 100,
        'total_losses_pct': abs(total_losses / initial_capital) * 100,
        'avg_win_pct': avg_win_pct,
        'avg_loss_pct': abs(avg_loss_pct),
        # 'max_win': max_win,
        # 'max_loss': abs(max_loss),
        'max_win_pct': (max_win / initial_capital) * 100,
        'max_loss_pct': abs(max_loss / initial_capital) * 100,
        'profit_factor': profit_factor,
        
        # Trading Behavior
        'avg_holding_days': avg_holding_days,
        'max_holding_days': max_holding_days,
        'min_holding_days': min_holding_days,
        'max_consecutive_wins': max_consecutive_wins,
        'max_consecutive_losses': max_consecutive_losses,
        
        # Detailed trades for further analysis
         'trades_detail': trades
    } 

def get_market_data(symbol: str):
    """Get enhanced market data including sector and fundamental metrics"""
    try:
        # base_symbol = symbol.replace('.NS', '').replace('.BO', '')
        ticker = yf.Ticker(symbol)
        info = ticker.info
        
        market_cap = info.get('marketCap', 0)
        market_cap_cr = market_cap / 1e7 if market_cap else 0
        
        return {
            'market_cap_cr': market_cap_cr,
            'sector': info.get('sector', 'Unknown'),
            'industry': info.get('industry', 'Unknown'),
            'pe_ratio': info.get('trailingPE', 0),
            'pb_ratio': info.get('priceToBook', 0),
            'debt_to_equity': info.get('debtToEquity', 0),
            'roe': info.get('returnOnEquity', 0)
        }
    except Exception:
        return {
            'market_cap_cr': 0,
            'sector': 'Unknown',
            'industry': 'Unknown',
            'pe_ratio': 0,
            'pb_ratio': 0,
            'debt_to_equity': 0,
            'roe': 0
        }

def get_res(symbol):
  df,ticker_symbol=DataFetcher(symbol,interval='1d',min_years=5)
  df=calculate_supertrend(df)
  df=price_sp_wma(df)
  data,labels =calculate_data_points(df)
  df=df[10:]
  data = [pd.Series(arr) for arr in data]
  lables = [pd.Series(arr) for arr in labels]
  df['data']=data
  df['labels']=lables
  df['label_'] = df.apply(lambda row: knn_weighted_series(row['data'], row['labels'], 3, row['superTrend']), axis=1)
  df.drop(columns=['data','labels','price_WMA','superTrend_WMA'], inplace=True)
  df['longTrailingStop'] = df['superTrend'] - (df['atr'] * 3)
  df = apply_corrected_trading_logic(df)
  back_test_met=back_test_metrice(df)
  stock_info=get_market_data(ticker_symbol)
  df=df[['Time','close','signal']].iloc[[-1]]
  df=df[df['signal'] != 'none']
  if not df.empty:
    results={
          'symbol':ticker_symbol,
          'Date':df['Time'].iloc[-1],
          'Current_price':df['close'].iloc[-1],
          'signal':df['signal'].iloc[-1],
          **back_test_met,
          **stock_info
      }
    return results
  else:
      return None
  
def process_stock(stock):
    try:
        res = get_res(stock)
        return (stock, res)
    except Exception:
        return (stock, None)
if __name__ == "__main__":
    stocks_df=pd.read_csv('source/STOCKS.csv')
    stocks=stocks_df['Symbol']

    sucecss_results=[]
    failed_stocks=[]
    all_results=[]


    with ThreadPoolExecutor(max_workers= (os.cpu_count()- 1)) as executor:
        futures = {executor.submit(process_stock, stock): stock for stock in stocks}
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing stocks", unit="stock"):
            stock, res = future.result()
            if res:
                all_results.append(res)
            else:
                failed_stocks.append(stock)


    print('Total stocks : ',len(stocks))
    print('Analysis sucecss : ',len(all_results))
    print('Analysis failed : ',len(failed_stocks))
    results_df=pd.DataFrame(all_results)
    results_df.to_csv('signal/today_signal_data.csv',index=False)

Processing stocks:   2%|▏         | 40/2225 [02:25<2:47:59,  4.61s/stock]$FINPIPE.NS: possibly delisted; no price data found  (1d 1926-06-26 -> 2025-06-01)
Processing stocks:  17%|█▋        | 382/2225 [20:11<1:05:04,  2.12s/stock]$MOCAPITAL.BO: possibly delisted; no timezone found
Processing stocks:  17%|█▋        | 385/2225 [20:15<52:32,  1.71s/stock]  $BPCL.NS: possibly delisted; no price data found  (1d 1926-06-26 -> 2025-06-01)
Processing stocks:  25%|██▍       | 556/2225 [28:03<41:49,  1.50s/stock]  NEXT30ADD.BO: Period 'max' is invalid, must be of the format 1d, 5d, etc.
Processing stocks:  28%|██▊       | 623/2225 [31:04<1:03:16,  2.37s/stock]$GROWWN200.BO: possibly delisted; no timezone found
Processing stocks:  31%|███       | 693/2225 [34:15<51:32,  2.02s/stock]  $TATASTEEL.NS: possibly delisted; no price data found  (1d 1926-06-26 -> 2025-06-01)
Processing stocks:  32%|███▏      | 701/2225 [34:38<1:01:48,  2.43s/stock]$MSCIINDIA.BO: possibly delisted; no timezone found
Proce

Total stocks :  2225
Analysis sucecss :  4
Analysis failed :  2221



