In [None]:
def generate_benchmark_data(stock_data):
    stock_data['benchmark'] = stock_data['close'].rolling(window=5).mean()
    stock_data['benchmark'].fillna(method='bfill', inplace=True)
    return stock_data


def calculate_excess_return(data):
    data['return'] = data['close'].pct_change()
    data['benchmark_return'] = data['benchmark'].pct_change()
    data['excess_return'] = data['return'] - data['benchmark_return']
    return data

def discretize_features(data, feature_columns):
  
    for feature in feature_columns:
   
        if np.issubdtype(data[feature].dtype, np.number):
            
            threshold = data[feature].quantile(0.8)
            
            data[feature] = np.where(data[feature] >= threshold, 1, 0)
        else:
            
            print(f"Warning: {feature} is not a numeric type and will be skipped.")
   
    return data

In [None]:
def cfs(data, target_column):
    V = data.columns.tolist()  
    V.remove(target_column)  
    S = []  

    data = data.dropna()

    num_features = data[V].select_dtypes(include=[np.number]).columns.tolist()

    def calculate_mi(X, y):
        return mutual_info_regression(X, y) if y.dtype in [np.float64, np.int64] else mutual_info_classif(X, y)

    target = data[target_column]
    mi_scores = calculate_mi(data[num_features], target)

    ranked_features = sorted(zip(num_features, mi_scores), key=lambda x: x[1], reverse=True)

    for feature, score in ranked_features:
        
        if not S:
            S.append(feature)
            continue

        relevance = calculate_mi(data[S], target)
        
        relevance_with_new = calculate_mi(data[S + [feature]], target)

        if all(relevance_with_new[i] >= relevance[i] for i in range(len(S))):
            S.append(feature)

    return S

In [None]:
def generate_composite_index(data):
    
    data = data.copy()
    
    columns = data.columns.tolist()
    
    prefix_dict = {}
    for col in columns:
        
        prefix = col.split('_')[0]
       
        if prefix not in prefix_dict:
            prefix_dict[prefix] = []
        
        prefix_dict[prefix].append(col)
    
    for prefix, cols in prefix_dict.items():
        
        if len(cols) > 1:
           
            composite_col_name = f'{prefix}_Composite index'
            
            data[composite_col_name] = data[cols].mean(axis=1) + data[cols].std(axis=1)
    
    return data