In [None]:
# 02_feature_engineering.ipynb

# Install necessary libraries
!pip install ta vaderSentiment pandas

# Import libraries
import pandas as pd
import numpy as np
import ta  # Technical Analysis library
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pytrends.request import TrendReq
from datetime import datetime

# Load raw data (OHLCV, Google Trends, News Data)
ohlcv_data = pd.read_csv("data/raw/ohlcv_data.csv")
news_data = pd.read_csv("data/raw/news_data.csv")
google_trends_data = pd.read_csv("data/raw/google_trends.csv")

# 1. Calculate Technical Analysis Indicators using 'ta' library
def calculate_technical_indicators(df):
    # Example: Adding RSI, MACD, and moving averages to the dataframe
    df['rsi'] = ta.momentum.RSIIndicator(df['close']).rsi()
    df['macd'] = ta.trend.MACD(df['close']).macd()
    df['macd_signal'] = ta.trend.MACD(df['close']).macd_signal()
    df['ema'] = ta.trend.EMAIndicator(df['close']).ema_indicator()
    df['sma'] = ta.trend.SMAIndicator(df['close']).sma_indicator()
    
    return df

# Apply the technical indicators on the OHLCV data
ohlcv_data = calculate_technical_indicators(ohlcv_data)

# 2. Sentiment Analysis of Crypto News using VADER
def get_sentiment(news_df):
    analyzer = SentimentIntensityAnalyzer()
    sentiments = []
    for text in news_df['title']:
        sentiment_score = analyzer.polarity_scores(text)
        sentiments.append(sentiment_score['compound'])
    news_df['sentiment'] = sentiments
    return news_df

# Apply sentiment analysis on the news data
news_data = get_sentiment(news_data)

# 3. Feature: Google Trends Data (Public Interest Over Time)
def preprocess_google_trends(df):
    df = df[['Bitcoin', 'Ethereum', 'Binance Coin']]
    df = df.rename(columns={'Bitcoin': 'bitcoin_trend', 'Ethereum': 'eth_trend', 'Binance Coin': 'bnb_trend'})
    return df

# Preprocess Google Trends Data
google_trends_data = preprocess_google_trends(google_trends_data)

# 4. Combine All Features (OHLCV, Sentiment, Google Trends)
combined_features = pd.merge(ohlcv_data, google_trends_data, left_on='date', right_on='date', how='left')
combined_features = pd.merge(combined_features, news_data[['date', 'sentiment']], on='date', how='left')

# Save the combined feature dataset
combined_features.to_csv("data/processed/combined_features.csv", index=False)

# Output summary
print(f"Feature Engineering Complete at {datetime.now()}")
