In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Step 1: Data Extraction
def extract_data(company, start_date, end_date, interval):
    """
    Extracts OHLC data for a given company within a specified timeframe.
    Args:
    - company: Ticker symbol of the company
    - start_date: Start date for data extraction (YYYY-MM-DD)
    - end_date: End date for data extraction (YYYY-MM-DD)
    - interval: Interval for data (e.g., '1d' for daily)
    Returns:
    - DataFrame containing OHLC data
    """
    data = yf.download(company, start=start_date, end=end_date, interval=interval)
    return data

# Step 2: Indicator Calculation
def calculate_SMA(data, window=20):
    """
    Calculates Simple Moving Average (SMA).
    Args:
    - data: DataFrame containing OHLC data
    - window: Window size for SMA calculation
    Returns:
    - Series containing SMA values
    """
    return data['Close'].rolling(window=window).mean()

# Similarly, functions for EMA, MACD, OBV, Bollinger Bands, ADX, Stochastic Oscillators, ATR, Standard Deviation can be defined.

# Step 3: Feature Engineering
def create_features(data):
    """
    Constructs new features using technical indicators.
    Args:
    - data: DataFrame containing OHLC data
    Returns:
    - DataFrame containing original data along with new features
    """
    # Example: Adding SMA as a new feature
    data['SMA_20'] = calculate_SMA(data, window=20)
    return data

# Step 4: Trading Decision
def trading_decision(data):
    """
    Determines when to buy, sell, or hold based on indicators.
    Args:
    - data: DataFrame containing OHLC data with indicators
    Returns:
    - Series containing trading decisions (Buy, Sell, Hold)
    """
    # Example: Simple trading strategy based on SMA crossover
    data['Signal'] = np.where(data['Close'] > data['SMA_20'], 'Buy', 'Sell')
    return data['Signal']

# Step 5: Model Building
def logistic_regression(X_train, y_train, X_test):
    """
    Builds logistic regression model from scratch.
    Args:
    - X_train: Training features
    - y_train: Training target variable
    - X_test: Testing features
    Returns:
    - Predicted trading decisions
    """
    # Placeholder code for logistic regression
    # Here, you would implement logistic regression using gradient descent or other optimization techniques
    # For simplicity, we'll use a placeholder random prediction
    predictions = np.random.choice(['Buy', 'Sell', 'Hold'], size=len(X_test))
    return predictions

# Step 6: Evaluation
def evaluate(y_true, y_pred):
    """
    Evaluates the model using F1-score, accuracy, and AUC-ROC score.
    Args:
    - y_true: True target variable
    - y_pred: Predicted target variable
    Returns:
    - Dictionary containing evaluation metrics
    """
    f1 = f1_score(y_true, y_pred, average='weighted')
    accuracy = accuracy_score(y_true, y_pred)
    auc_roc = roc_auc_score(pd.get_dummies(y_true), pd.get_dummies(y_pred), average='weighted', multi_class='ovr')
    return {'F1 Score': f1, 'Accuracy': accuracy, 'AUC-ROC Score': auc_roc}

# Example usage
company = 'AAPL'  # Choosing Apple Inc.
start_date = '2023-01-01'
end_date = '2024-01-01'
interval = '1d'
data = extract_data(company, start_date, end_date, interval)

features = create_features(data)
X = features.drop(['Signal'], axis=1)  # Features
y = trading_decision(features)  # Target variable

# Splitting data into train and test sets (80-20 split)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Building and evaluating logistic regression model
predictions = logistic_regression(X_train_scaled, y_train, X_test_scaled)
evaluation = evaluate(y_test, predictions)
print("Evaluation Metrics:")
print(evaluation)

In [1]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

def download_data(ticker, period="1y"):
  """Downloads OHLC data for the specified ticker and time frame."""
  data = yf.download(ticker, period=period)["Adj Close"]
  return data

# Choose a Nifty 50 company (replace with your desired ticker)
ticker = "RELIANCE.NS"

# Download OHLC data
data = download_data(ticker)

# Preprocess data (handle missing values, outliers, etc.)
# ... (implementation specific to your data)

def calculate_SMA(data, window):
  """Calculates the Simple Moving Average (SMA)."""
  return data.rolling(window=window).mean()
    

def calculate_RSI(data, window):
  """Calculates the Relative Strength Index (RSI)."""
  delta = data.diff()
  up, down = delta[delta > 0], delta[delta < 0].abs()
  RS = up.ewm(alpha=1/window, min_periods=window).mean() / down.ewm(alpha=1/window, min_periods=window).mean()
  RSI = 100 - 100 / (1 + RS)
  return RSI
    

def calculate_MACD(data, slow_ema_window=26, fast_ema_window=12, signal_ema_window=9):
  """Calculates the Moving Average Convergence Divergence (MACD)."""
  fast_ema = data.ewm(alpha=1/fast_ema_window, min_periods=fast_ema_window).mean()
  slow_ema = data.ewm(alpha=1/slow_ema_window, min_periods=slow_ema_window).mean()
  macd = fast_ema - slow_ema
  signal_line = macd.ewm(alpha=1/signal_ema_window, min_periods=signal_ema_window).mean()
  return macd, signal_line
    

def calculate_bollinger_bands(data, window=20):
  """Calculates Bollinger Bands (BB)."""
  rolling_mean = data.rolling(window=window).mean()
  std = data.rolling(window=window).std()
  upper_band = rolling_mean + (2 * std)
  lower_band = rolling_mean - (2 * std)
  return upper_band, lower_band
    

# Implement functions for other indicators (e.g., Stochastic Oscillator, ATR, Standard Deviation)
# ...
def create_features(data):
  """Creates new features from technical indicators."""
  features = pd.DataFrame()
  features["SMA_20"] = calculate_SMA(data, window=20)
  features["RSI_14"] = calculate_RSI(data, window=14)
  # Add MACD, Bollinger Bands, and other indicators (refer to functions above)
  features["MACD"], features["MACD_signal"] = calculate_MACD(data)
  features["BB_upper"], features["BB_lower"] = calculate_bollinger_bands(data)
  # ... (add calculations for other features)
  # Feature engineering using statistical techniques or combinations of indicators
  features["RSI_momentum"] = features["RSI_14"].diff()
  features["BB_width"] = features["BB_upper"] - features["BB_lower"]
  # ... (create additional features based on your strategy)
  return features


def define_target(data, features):
  """Defines target variable (Buy/Sell/Hold) based on trading rules."""
  targets = []
  for i in range(len(data)):
    # Implement your trading strategy rules here (e.g., RSI crossover with price)
    if features.loc[i, "RSI_14"] < 30 and data.iloc[i] > features.loc[i, "SMA_20"]:
        targets.append("Buy")  # Replace with appropriate buy condition
    elif features.loc[i, "RSI_14"] > 70 and data.iloc[i] < features.loc[i, "SMA_20"]:
        targets.append("Sell")  # Replace with appropriate sell condition
    else:
        targets.append("Hold")
    return pd.Series(targets, index=data.index, name="Target")


def build_model(features, target):
  """Builds a multivariate logistic regression model."""
  X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)
  model = LogisticRegression(multi_class="multinomial", solver="lbfgs")
  model.fit(X_train, y_train)

  # Make predictions on test data
  y_pred = model.predict(X_test)

  # Evaluate model performance
  accuracy = accuracy_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred, average="weighted")
  auc_roc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

  print("Accuracy:", accuracy)
  print("F1-score:", f1)
  print("AUC-ROC Score:", auc_roc)
  return model


[*********************100%%**********************]  1 of 1 completed


In [4]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

def download_data(ticker, period="1d"):
  """Downloads OHLC data for the specified ticker and time frame."""
  data = yf.download(ticker, period=period)["Adj Close"]
  return data

def calculate_SMA(data, window):
  """Calculates the Simple Moving Average (SMA)."""
  return data.rolling(window=window).mean()

def calculate_EMA(data, window):
  """Calculates the Exponential Moving Average (EMA)."""
  alpha = 2 / (window + 1)
  return data.ewm(alpha=alpha, min_periods=window).mean()

def calculate_MACD(data, slow_ema_window=26, fast_ema_window=12, signal_ema_window=9):
  """Calculates the Moving Average Convergence Divergence (MACD)."""
  fast_ema = calculate_EMA(data, fast_ema_window)
  slow_ema = calculate_EMA(data, slow_ema_window)
  macd = fast_ema - slow_ema
  signal_line = calculate_EMA(macd, signal_ema_window)
  return macd, signal_line

def calculate_RSI(data, window):
  """Calculates the Relative Strength Index (RSI)."""
  delta = data.diff()
  up, down = delta[delta > 0], delta[delta < 0].abs()
  RS = up.ewm(alpha=1/window, min_periods=window).mean() / down.ewm(alpha=1/window, min_periods=window).mean()
  RSI = 100 - 100 / (1 + RS)
  return RSI

def calculate_bollinger_bands(data, window=20):
  """Calculates Bollinger Bands (BB)."""
  rolling_mean = data.rolling(window=window).mean()
  std = data.rolling(window=window).std()
  upper_band = rolling_mean + (2 * std)
  lower_band = rolling_mean - (2 * std)
  return upper_band, lower_band

def create_features(data):
  """Creates new features from technical indicators."""
  features = pd.DataFrame()
  features["SMA_20"] = calculate_SMA(data, window=20)
  features["EMA_12"] = calculate_EMA(data, window=12)
  features["MACD"], features["MACD_signal"] = calculate_MACD(data)
  features["RSI_14"] = calculate_RSI(data, window=14)
  features["BB_upper"], features["BB_lower"] = calculate_bollinger_bands(data)

  # Feature engineering
  features["EMA_crossover"] = features["EMA_12"] > features["SMA_20"]
  features["RSI_momentum"] = features["RSI_14"].diff()  # Change in RSI from previous period
  features["BB_width"] = features["BB_upper"] - features["BB_lower"]  # Bollinger Bands width
  return features

def define_target(data, features):
  """Defines target variable (Buy/Sell/Hold) based on trading rules."""
  targets = []
  for i in range(len(data)):
    # Implement your trading strategy rules here
    if features.loc[i, "EMA_crossover"] and features.loc[i, "RSI_14"] < 30:
      targets.append("Buy")  # Buy on EMA crossover above SMA and low RSI
    elif not features.loc[i, "EMA_crossover"] and features.loc[i, "RSI_14"] > 70:
      targets.append("Sell")  # Sell on no EMA crossover and high RSI
    else:
      targets.append("Hold")
  return pd.Series(targets, index=data.index, name="Target")

def build_model(features, target):
  """Builds a multivariate logistic regression model."""
  X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)
  model = LogisticRegression(multi_class="multinomial", solver="lbfgs")
  model.fit(X_train, y_train)

  # Make predictions on test data
  y_pred = model.predict(X_test)

  # Evaluate model performance
    
  # finding model accuracy
  accuracy = accuracy_score(y_test, y_pred)
  # finding model f1 score
  f1 = f1_score(y_test, y_pred, average="weighted")
  # finding model auc-roc score
  auc_roc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

  print("Accuracy: ", accuracy)
  print("F1-score: ", f1)
  print("AUC-ROC Score: ", auc_roc)
  return model

# Call the functions in a main function
def main():
  # Choose a Nifty 50 company (replace with your desired ticker)
  ticker = "RELIANCE.NS"

  # Download OHLC data
  data = download_data(ticker)

  # Create features
  features = create_features(data)

  # Define target variable
  target = define_target(data, features)

  # Build and evaluate model
  model = build_model(features, target)


  main()

