In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
from ta.momentum import RSIIndicator, ROCIndicator, StochasticOscillator
from ta.trend import MACD, SMAIndicator, EMAIndicator, ADXIndicator, PSARIndicator
from ta.volatility import BollingerBands, AverageTrueRange
from ta.volume import VolumeWeightedAveragePrice, OnBalanceVolumeIndicator, AccDistIndexIndicator
from datetime import datetime, timedelta
import warnings
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from joblib import dump, load
import os

warnings.simplefilter(action='ignore', category=FutureWarning)

class MLTechnicalScorer:
    def __init__(self, model_path='ta_model.joblib', retrain_days=30,
                 short_period=14, medium_period=26, long_period=50):
        self.model_path = model_path
        self.retrain_days = retrain_days
        self.short_period = short_period
        self.medium_period = medium_period
        self.long_period = long_period
        self.model = None
        self.last_trained = None
        self.scaler = StandardScaler()
        
        # Try to load existing model
        if os.path.exists(model_path):
            self.model = load(model_path)
            self.last_trained = datetime.fromtimestamp(os.path.getmtime(model_path))
    
    def prepare_features(self, df, lookforward_days=5):
        """
        Prepare feature matrix from historical data using configurable timeframes
        Returns:
            X (DataFrame): Features
            y (Series): Target (future returns)
        """
        # Calculate all technical indicators with configurable periods
        indicators = self._calculate_all_indicators(df)
        
        # Create features DataFrame
        features = pd.DataFrame()
        
        # Price features
        features['price'] = df['Close']
        features[f'returns_{self.short_period}d'] = df['Close'].pct_change(self.short_period)
        features[f'returns_{self.medium_period}d'] = df['Close'].pct_change(self.medium_period)
        features[f'returns_{self.long_period}d'] = df['Close'].pct_change(self.long_period)
        
        # Add all technical indicators
        for name, values in indicators.items():
            if isinstance(values, (pd.Series, pd.DataFrame)):
                features[name] = values
            
        # Calculate target (future returns)
        features['target'] = df['Close'].pct_change(lookforward_days).shift(-lookforward_days)
        
        # Drop rows with missing values
        features = features.dropna()
        
        if len(features) == 0:
            return None, None
            
        X = features.drop('target', axis=1)
        y = features['target']
        
        return X, y
    
    def _calculate_all_indicators(self, df):
        """Calculate all technical indicators using configurable timeframes"""
        indicators = {}
        
        # Momentum indicators for all timeframes
        for period, suffix in [(self.short_period, 'short'), 
                             (self.medium_period, 'medium'), 
                             (self.long_period, 'long')]:
            indicators[f'rsi_{suffix}'] = RSIIndicator(close=df['Close'], window=period).rsi()
            indicators[f'roc_{suffix}'] = ROCIndicator(close=df['Close'], window=period).roc()
            
            # Stochastic only for short timeframe
            if suffix == 'short':
                indicators[f'stoch_{suffix}'] = StochasticOscillator(
                    high=df['High'], low=df['Low'], close=df['Close'], 
                    window=period, smooth_window=3).stoch()
        
        # Trend indicators
        for period, suffix in [(self.short_period, 'short'), 
                             (self.medium_period, 'medium'), 
                             (self.long_period, 'long')]:
            indicators[f'sma_{suffix}'] = SMAIndicator(close=df['Close'], window=period).sma_indicator()
            indicators[f'ema_{suffix}'] = EMAIndicator(close=df['Close'], window=period).ema_indicator()
            
            # MACD only for medium timeframe (standard settings)
            if suffix == 'medium':
                macd = MACD(close=df['Close'], window_slow=26, window_fast=12, window_sign=9)
                indicators['macd'] = macd.macd()
                indicators['macd_diff'] = macd.macd_diff()
                
            # ADX only for medium timeframe
            if suffix == 'medium':
                indicators['adx'] = ADXIndicator(
                    high=df['High'], low=df['Low'], close=df['Close'], window=period).adx()
        
        # Volatility indicators
        for period, suffix in [(self.short_period, 'short'), 
                             (self.medium_period, 'medium'), 
                             (self.long_period, 'long')]:
            bb = BollingerBands(close=df['Close'], window=period, window_dev=2)
            indicators[f'bb_upper_{suffix}'] = bb.bollinger_hband()
            indicators[f'bb_middle_{suffix}'] = bb.bollinger_mavg()
            indicators[f'bb_lower_{suffix}'] = bb.bollinger_lband()
            
            # ATR for all timeframes
            indicators[f'atr_{suffix}'] = AverageTrueRange(
                high=df['High'], low=df['Low'], close=df['Close'], window=period).average_true_range()
        
        # Volume indicators
        for period, suffix in [(self.short_period, 'short'), 
                             (self.medium_period, 'medium'), 
                             (self.long_period, 'long')]:
            indicators[f'vwap_{suffix}'] = VolumeWeightedAveragePrice(
                high=df['High'], low=df['Low'], close=df['Close'], 
                volume=df['Volume'], window=period).volume_weighted_average_price()
        
        # Volume indicators that don't use timeframes
        indicators['obv'] = OnBalanceVolumeIndicator(close=df['Close'], volume=df['Volume']).on_balance_volume()
        indicators['adi'] = AccDistIndexIndicator(
            high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume']).acc_dist_index()
        indicators['volume_ma'] = df['Volume'].rolling(self.short_period).mean()
        indicators['volume_spike'] = (df['Volume'] > (df['Volume'].rolling(self.short_period).mean() * 2)).astype(int)
        
        # PSAR indicators for all timeframes with different parameters
        psar_params = {
            'short': (0.02, 0.2),
            'medium': (0.015, 0.15),
            'long': (0.01, 0.1)
        }
        
        for suffix, (step, max_step) in psar_params.items():
            period = getattr(self, f'{suffix}_period')
            psar = PSARIndicator(
                high=df['High'], low=df['Low'], close=df['Close'],
                step=step, max_step=max_step).psar()
            indicators[f'psar_{suffix}'] = psar
            indicators[f'psar_bullish_{suffix}'] = (df['Close'] > psar).astype(int)
        
        return indicators
    
    def train_model(self, tickers, start_date, end_date):
        """Train model on historical data for multiple tickers"""
        all_X = []
        all_y = []
        
        for ticker in tickers:
            try:
                # Download historical data - need enough for longest timeframe
                stock = yf.Ticker(ticker)
                df = stock.history(start=start_date - timedelta(days=self.long_period*3),
                                  end=end_date, 
                                  interval='1d')
                #print(len(df), self.long_period * 3)
                if len(df) < self.long_period * 3:  # Need at least 3x longest period
                    continue
                    
                # Prepare features
                X, y = self.prepare_features(df)
                
                if X is not None and y is not None:
                    all_X.append(X)
                    all_y.append(y)
            except Exception as e:
                print(f"Error processing {ticker}: {str(e)}")
                continue
        
        if not all_X:
            raise ValueError("No valid data found for training")
            
        # Combine all data
        X_combined = pd.concat(all_X)
        y_combined = pd.concat(all_y)
        
        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(
            X_combined, y_combined, test_size=0.2, random_state=42)
        
        # Create pipeline with scaling and model
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('model', RandomForestRegressor(
                n_estimators=100,
                max_depth=10,
                min_samples_leaf=5,
                random_state=42,
                n_jobs=-1))
        ])
        
        # Train model
        pipeline.fit(X_train, y_train)
        
        # Evaluate
        train_score = pipeline.score(X_train, y_train)
        test_score = pipeline.score(X_test, y_test)
        print(f"Model trained - Train R²: {train_score:.3f}, Test R²: {test_score:.3f}")
        
        # Save model
        self.model = pipeline
        self.last_trained = datetime.now()
        dump(pipeline, self.model_path)
        
        return pipeline
    
    def should_retrain(self):
        """Check if model needs retraining"""
        if self.last_trained is None:
            return True
        return (datetime.now() - self.last_trained).days >= self.retrain_days
    
    def calculate_score_and_signals(self, ticker, current_date):
        """Calculate ML-based score and generate trading signals"""
        if self.model is None or self.should_retrain():
            print("Model not loaded or needs retraining - training now...")
            # You would replace this with your actual training tickers
            self.train_model(["BEL.NS","DLF.NS"], current_date - timedelta(days=365*3),current_date)
        
        try:
            # Download data - need enough for longest timeframe
            end_date = current_date
            start_date = end_date - timedelta(days=self.long_period*3)
            stock = yf.Ticker(ticker)
            df = stock.history(start=start_date, end=end_date, interval='1d')
            
            if len(df) < self.long_period:  # Minimum data required
                return None
                
            # Prepare features (without target)
            X, _ = self.prepare_features(df)
            if X is None or len(X) == 0:
                return None
                
            # Get most recent features
            latest_features = X.iloc[-1:].copy()
            
            # Make prediction
            predicted_return = self.model.predict(latest_features)[0]
            
            # Convert return prediction to score (0-100)
            score = 50 + (predicted_return * 500)  # Scale to make meaningful differences
            score = max(0, min(100, score))  # Clamp to 0-100
            
            # Generate signals based on indicators and score
            signals = self._generate_signals(df, latest_features.iloc[0], score)
            
            # Prepare result dictionary
            result = {
                'Ticker': ticker,
                'Date': current_date.strftime('%Y-%m-%d'),
                'Price': f"{df['Close'].iloc[-1]:.2f}",
                f'Change_{self.short_period}D': f"{df['Close'].iloc[-1]/df['Close'].iloc[-self.short_period-1]-1:.1%}",
                f'Change_{self.medium_period}D': f"{df['Close'].iloc[-1]/df['Close'].iloc[-self.medium_period-1]-1:.1%}",
                f'Change_{self.long_period}D': f"{df['Close'].iloc[-1]/df['Close'].iloc[-self.long_period-1]-1:.1%}",
                f'RSI_{self.short_period}': f"{latest_features[f'rsi_short'].iloc[0]:.1f}",
                f'RSI_{self.medium_period}': f"{latest_features[f'rsi_medium'].iloc[0]:.1f}",
                f'Stoch_%K_{self.short_period}': f"{latest_features[f'stoch_short'].iloc[0]:.1f}",
                f'MACD_diff_{self.medium_period}': f"{latest_features['macd_diff'].iloc[0]:.3f}",
                'BB_%_short': f"{(df['Close'].iloc[-1] - latest_features['bb_lower_short'].iloc[0])/(latest_features['bb_upper_short'].iloc[0] - latest_features['bb_lower_short'].iloc[0]):.1%}",
                f'SMA_{self.short_period}/{self.medium_period}/{self.long_period}': f"{latest_features[f'sma_short'].iloc[0]:.1f}/{latest_features[f'sma_medium'].iloc[0]:.1f}/{latest_features[f'sma_long'].iloc[0]:.1f}",
                f'PSAR_{self.short_period}': 'Bullish' if latest_features['psar_bullish_short'].iloc[0] else 'Bearish',
                f'PSAR_{self.medium_period}': 'Bullish' if latest_features['psar_bullish_medium'].iloc[0] else 'Bearish',
                f'PSAR_{self.long_period}': 'Bullish' if latest_features['psar_bullish_long'].iloc[0] else 'Bearish',
                'Volume': f"{df['Volume'].iloc[-1]/1e6:.1f}M",
                'Volume_Spike': 'Yes' if latest_features['volume_spike'].iloc[0] else 'No',
                'OBV_Trend': '↑' if latest_features['obv'].iloc[0] > X['obv'].iloc[-2] else '↓',
                'VWAP_Relation': 'above' if df['Close'].iloc[-1] > latest_features['vwap_medium'].iloc[0] else 'below',
                'Score': f"{score:.1f}",
                'Predicted_Return': f"{predicted_return:.2%}",
                'Signal': signals['primary'],
                'Signal_Reasons': signals['reasons']
            }
            
            return result
            
        except Exception as e:
            print(f"Error calculating score for {ticker}: {str(e)}")
            return None
    
    def _generate_signals(self, df, features, score):
        """Generate trading signals based on features and ML score"""
        signals = {
            'primary': 'HOLD',
            'reasons': []
        }
        
        # Get PSAR states
        psar_short = features['psar_bullish_short']
        psar_medium = features['psar_bullish_medium']
        psar_long = features['psar_bullish_long']
        
        # Get trend states
        price = df['Close'].iloc[-1]
        sma_short = features['sma_short']
        sma_medium = features['sma_medium']
        sma_long = features['sma_long']
        
        # Volume indicators
        obv_trend = '↑' if features['obv'] > features['obv'] else '↓'  # Simplified
        volume_spike = features['volume_spike']
        
        # Generate reasons
        if score > 70:
            signals['reasons'].append(f"High ML Score ({score:.1f})")
        if score < 30:
            signals['reasons'].append(f"Low ML Score ({score:.1f})")
            
        if psar_short and psar_medium and psar_long:
            signals['reasons'].append("PSAR Bullish All Timeframes")
        elif psar_short or psar_medium:
            signals['reasons'].append("PSAR Bullish Some Timeframes")
            
        if price > sma_short > sma_medium > sma_long:
            signals['reasons'].append("Strong Uptrend (Price > SMAs)")
            
        if obv_trend == '↑' and volume_spike:
            signals['reasons'].append("OBV Up with Volume Spike")
        
        # Generate primary signal
        if (score > 75 and psar_short and psar_medium and psar_long and 
            price > sma_short > sma_medium > sma_long):
            signals['primary'] = "STRONG BUY"
        elif score > 65 and (psar_short or psar_medium) and price > sma_short:
            signals['primary'] = "BUY"
        elif (score < 25 and not psar_short and not psar_medium and not psar_long and 
              price < sma_short < sma_medium):
            signals['primary'] = "STRONG SELL"
        elif score < 35 and not (psar_short and psar_medium) and price < sma_short:
            signals['primary'] = "SELL"
        
        return signals

# Example usage with your original timeframes (14/26/50):
if __name__ == "__main__":
    # Initialize scorer with your preferred timeframes
    scorer = MLTechnicalScorer(short_period=14, medium_period=26, long_period=50)
    
    # Calculate score and signals for a stock
    result = scorer.calculate_score_and_signals('JMFINANCIL.NS', datetime.now())
    
    if result:
        print("\nTechnical Analysis Results:")
        print("-" * 50)
        for key, value in result.items():
            if key == 'features':
                continue
            print(f"{key:>25}: {value}")

Model not loaded or needs retraining - training now...
840 150
841 150
Model trained - Train R²: 0.656, Test R²: 0.413

Technical Analysis Results:
--------------------------------------------------
                   Ticker: JMFINANCIL.NS
                     Date: 2025-06-17
                    Price: 143.28
               Change_14D: 19.3%
               Change_26D: 46.2%
               Change_50D: 51.5%
                   RSI_14: 84.2
                   RSI_26: 76.6
              Stoch_%K_14: 97.6
             MACD_diff_26: 2.220
               BB_%_short: 90.4%
             SMA_14/26/50: 128.2/117.8/107.7
                  PSAR_14: Bullish
                  PSAR_26: Bullish
                  PSAR_50: Bullish
                   Volume: 3.7M
             Volume_Spike: Yes
                OBV_Trend: ↑
            VWAP_Relation: above
                    Score: 53.0
         Predicted_Return: 0.61%
                   Signal: HOLD
           Signal_Reasons: ['PSAR Bullish All Timeframe