# NLP Market Sentiment Analysis - Future Work Demo

This notebook demonstrates the advanced features added to the FinLlama ensemble system:
1. Time Series Analysis & Forecasting
2. Portfolio Optimization
3. Real-time Alert System
4. Interactive Visualizations

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Import our modules
from future_work_implementation import (
    TimeSeriesAnalyzer,
    PortfolioOptimizer,
    SentimentAlertSystem,
    EnhancedVisualizer
)
from integration import SentimentPipeline

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("âœ“ All libraries imported successfully")

## 1. Data Preparation

Load sentiment data from your ensemble system output or generate sample data.

In [None]:
# Option 1: Load your actual ensemble output
# sentiment_df = pd.read_csv('path/to/your/ensemble_output.csv')
# sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

# Option 2: Generate sample data for demonstration
def generate_sample_data(companies=['AAPL', 'GOOGL', 'MSFT', 'TSLA'], days=90):
    np.random.seed(42)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)
    dates = pd.date_range(start=start_date, end=end_date, freq='D')
    
    all_data = []
    for company in companies:
        # Random walk sentiment
        sentiment = np.random.randn(len(dates)).cumsum() * 0.05
        sentiment = np.clip(sentiment, -1, 1)
        
        for i, date in enumerate(dates):
            all_data.append({
                'date': date,
                'company': company,
                'sentiment_score': sentiment[i],
                'confidence': np.random.uniform(0.6, 0.95),
                'volume': np.random.randint(100, 1000)
            })
    
    return pd.DataFrame(all_data)

# Generate sample data
full_df = generate_sample_data()

print(f"Generated {len(full_df)} sentiment records")
print(f"Companies: {full_df['company'].unique().tolist()}")
print(f"Date range: {full_df['date'].min()} to {full_df['date'].max()}")
full_df.head()

In [None]:
# Prepare data by company
companies = full_df['company'].unique().tolist()
sentiment_data = {}

for company in companies:
    company_df = full_df[full_df['company'] == company].copy()
    company_df = company_df.sort_values('date').reset_index(drop=True)
    sentiment_data[company] = company_df

print(f"Prepared data for {len(sentiment_data)} companies")

## 2. Time Series Analysis & Forecasting

Train LSTM models to forecast future sentiment.

In [None]:
# Initialize analyzer
analyzer = TimeSeriesAnalyzer(sequence_length=10)

# Select a company for detailed analysis
company = 'AAPL'
df = sentiment_data[company]

print(f"Analyzing {company} with {len(df)} days of data")

In [None]:
# Train LSTM model
print("Training LSTM model...")
history = analyzer.train_lstm_model(
    df,
    epochs=30,
    batch_size=16,
    learning_rate=0.001
)

print("\nâœ“ Training complete!")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Model Training History')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(history['train_loss'], label='Train')
plt.plot(history['val_loss'], label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss (log scale)')
plt.title('Training Progress (Log Scale)')
plt.yscale('log')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Final training loss: {history['train_loss'][-1]:.6f}")
print(f"Final validation loss: {history['val_loss'][-1]:.6f}")

In [None]:
# Generate predictions
forecast_days = 14
predictions = analyzer.predict_future_sentiment(df, days_ahead=forecast_days)

print(f"Generated {forecast_days}-day forecast:")
print(predictions)

In [None]:
# Calculate prediction intervals
intervals = analyzer.calculate_prediction_intervals(df, days_ahead=7, confidence=0.95)

print("Prediction intervals (95% confidence):")
print(intervals)

In [None]:
# Visualize predictions
plt.figure(figsize=(14, 6))

# Historical sentiment
plt.plot(df['date'], df['sentiment_score'], 'b-', label='Historical', linewidth=2)

# Predictions
plt.plot(predictions['date'], predictions['predicted_sentiment'], 
         'r--', label='Forecast', linewidth=2, marker='o')

# Confidence intervals
if 'lower_bound' in intervals.columns:
    plt.fill_between(
        intervals['date'],
        intervals['lower_bound'],
        intervals['upper_bound'],
        alpha=0.3,
        color='red',
        label='95% Confidence Interval'
    )

plt.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
plt.xlabel('Date')
plt.ylabel('Sentiment Score')
plt.title(f'{company} Sentiment Forecast')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Portfolio Optimization

Use sentiment signals to optimize portfolio allocation.

In [None]:
# Initialize optimizer
optimizer = PortfolioOptimizer(risk_free_rate=0.02)

# Get latest sentiment scores
sentiment_scores = {
    company: df['sentiment_score'].iloc[-1]
    for company, df in sentiment_data.items()
}

print("Current Sentiment Scores:")
for company, score in sentiment_scores.items():
    sentiment_label = "Positive" if score > 0 else "Negative"
    print(f"  {company}: {score:+.3f} ({sentiment_label})")

In [None]:
# Generate sample returns data
dates = pd.date_range(
    start=min([df['date'].min() for df in sentiment_data.values()]),
    end=max([df['date'].max() for df in sentiment_data.values()]),
    freq='D'
)

returns_data = pd.DataFrame({
    company: np.random.randn(len(dates)) * 0.02 + 0.001
    for company in companies
})

print(f"Generated returns data: {returns_data.shape}")
print("\nMean daily returns:")
print(returns_data.mean())

In [None]:
# Calculate sentiment-weighted expected returns
expected_returns = optimizer.calculate_sentiment_weighted_returns(
    sentiment_scores, returns_data
)

print("\nSentiment-Adjusted Expected Returns:")
for company, ret in zip(companies, expected_returns):
    sentiment = sentiment_scores[company]
    print(f"  {company}: {ret:.4f} (Sentiment: {sentiment:+.3f})")

In [None]:
# Calculate covariance matrix
cov_matrix = returns_data.cov().values

# Optimize portfolio for maximum Sharpe ratio
optimal_sharpe = optimizer.optimize_portfolio_sharpe(expected_returns, cov_matrix)

print("\n" + "="*60)
print("OPTIMAL PORTFOLIO (Maximum Sharpe Ratio)")
print("="*60)
print("\nAllocation:")
for i, company in enumerate(companies):
    weight = optimal_sharpe['weights'][i]
    if weight > 0.01:
        print(f"  {company:6s}: {weight:6.2%}")

print(f"\nExpected Return: {optimal_sharpe['expected_return']:.4f}")
print(f"Volatility:      {optimal_sharpe['volatility']:.4f}")
print(f"Sharpe Ratio:    {optimal_sharpe['sharpe_ratio']:.4f}")

In [None]:
# Generate efficient frontier
print("Generating efficient frontier...")
frontier_df = optimizer.generate_efficient_frontier(
    expected_returns, cov_matrix, n_points=50
)

print(f"Generated {len(frontier_df)} portfolio points")

In [None]:
# Plot efficient frontier
plt.figure(figsize=(12, 8))

# Efficient frontier line
plt.plot(frontier_df['volatility'], frontier_df['return'], 
         'b-', linewidth=2, label='Efficient Frontier')

# Color points by Sharpe ratio
scatter = plt.scatter(
    frontier_df['volatility'], 
    frontier_df['return'],
    c=frontier_df['sharpe'],
    cmap='viridis',
    s=50,
    alpha=0.6
)

# Mark optimal portfolio
plt.scatter(
    optimal_sharpe['volatility'],
    optimal_sharpe['expected_return'],
    c='red',
    s=300,
    marker='*',
    edgecolors='black',
    linewidths=2,
    label=f"Optimal (Sharpe={optimal_sharpe['sharpe_ratio']:.2f})",
    zorder=5
)

plt.colorbar(scatter, label='Sharpe Ratio')
plt.xlabel('Portfolio Volatility (Risk)')
plt.ylabel('Expected Return')
plt.title('Efficient Frontier with Sentiment-Adjusted Returns')
plt.legend(loc='upper left')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Portfolio allocation pie chart
weights = optimal_sharpe['weights']
non_zero = weights > 0.01
filtered_weights = weights[non_zero]
filtered_companies = [c for i, c in enumerate(companies) if non_zero[i]]

plt.figure(figsize=(10, 8))
colors = plt.cm.Set3(range(len(filtered_companies)))
plt.pie(
    filtered_weights,
    labels=filtered_companies,
    autopct='%1.1f%%',
    colors=colors,
    startangle=90,
    textprops={'fontsize': 12}
)
plt.title('Optimal Portfolio Allocation', fontsize=14, fontweight='bold')
plt.axis('equal')
plt.tight_layout()
plt.show()

## 4. Real-time Alert System

Detect sentiment anomalies and generate alerts.

In [None]:
# Initialize alert system
alert_system = SentimentAlertSystem(
    lookback_period=30,
    z_threshold=2.0
)

# Generate all alerts
alerts_df = alert_system.get_all_alerts(sentiment_data)

print(f"Generated {len(alerts_df)} alerts")

if len(alerts_df) > 0:
    print("\nAlert Summary:")
    print(alerts_df.groupby(['alert_type', 'severity']).size())

In [None]:
# Display alerts
if len(alerts_df) > 0:
    print("\n" + "="*80)
    print("DETECTED ALERTS")
    print("="*80)
    
    for _, alert in alerts_df.iterrows():
        severity_icon = "ðŸ”´" if alert.get('severity') == 'high' else "ðŸŸ¡"
        alert_type = alert.get('alert_type', alert.get('reversal_type', 'unknown'))
        company = alert.get('company', 'N/A')
        
        print(f"\n{severity_icon} {company} - {alert_type.upper()}")
        print(f"   Date: {alert.get('date')}")
        
        if 'z_score' in alert:
            print(f"   Z-Score: {alert['z_score']:.2f}")
        if 'pct_change' in alert:
            print(f"   Change: {alert['pct_change']:.1f}%")
else:
    print("\nâœ“ No alerts detected in the analyzed period")

## 5. Interactive Visualizations

Create comprehensive interactive dashboards using Plotly.

In [None]:
# Initialize visualizer
visualizer = EnhancedVisualizer()

# Create time series plot with predictions
company = 'AAPL'
fig = visualizer.plot_sentiment_timeseries(
    sentiment_data[company],
    company,
    predictions=predictions
)

fig.show()

In [None]:
# Create sentiment heatmap
fig = visualizer.plot_sentiment_heatmap(sentiment_data)
fig.show()

In [None]:
# Create efficient frontier plot
portfolio_results = {
    'companies': companies,
    'optimal_sharpe': optimal_sharpe,
    'efficient_frontier': frontier_df
}

fig = visualizer.plot_efficient_frontier(frontier_df, optimal_sharpe)
fig.show()

In [None]:
# Create portfolio allocation chart
fig = visualizer.plot_portfolio_allocation(
    optimal_sharpe['weights'],
    companies
)
fig.show()

## 6. Full Pipeline Integration

Run the complete end-to-end pipeline.

In [None]:
# Initialize pipeline
pipeline = SentimentPipeline(config={
    'lookback_days': 60,
    'forecast_days': 7,
    'risk_free_rate': 0.02
})

print("Pipeline initialized with:")
print(f"  Lookback period: {pipeline.lookback_days} days")
print(f"  Forecast horizon: {pipeline.forecast_days} days")
print(f"  Risk-free rate: {pipeline.risk_free_rate:.2%}")

In [None]:
# Run full analysis
# Note: This will take several minutes to complete

results = pipeline.run_full_analysis(
    companies=['AAPL', 'GOOGL', 'MSFT', 'TSLA'],
    train_models=True
)

print("\n" + "="*80)
print("PIPELINE EXECUTION COMPLETE")
print("="*80)

In [None]:
# Display summary results
print("\nAnalysis Summary:")
print("-" * 60)

if 'predictions' in results:
    pred_count = len([p for p in results['predictions'].values() if p is not None])
    print(f"âœ“ Predictions generated for {pred_count} companies")

if 'alerts' in results:
    alert_count = len(results['alerts'])
    print(f"âœ“ {alert_count} alerts detected")

if 'portfolio' in results:
    sharpe = results['portfolio']['optimal_sharpe']['sharpe_ratio']
    ret = results['portfolio']['optimal_sharpe']['expected_return']
    vol = results['portfolio']['optimal_sharpe']['volatility']
    print(f"âœ“ Portfolio optimized:")
    print(f"    Sharpe Ratio: {sharpe:.4f}")
    print(f"    Expected Return: {ret:.4f}")
    print(f"    Volatility: {vol:.4f}")

if 'visualizations' in results:
    viz_count = len(results['visualizations'])
    print(f"âœ“ {viz_count} visualizations created")

print("\nAll outputs saved to /home/claude/")

## 7. Export Results

Save all results for further analysis.

In [None]:
# Results are automatically saved by the pipeline
# You can also manually export specific results:

# Export predictions
if 'predictions' in results:
    for company, pred_df in results['predictions'].items():
        if pred_df is not None:
            pred_df.to_csv(f'/home/claude/predictions_{company}.csv', index=False)
            print(f"Saved predictions for {company}")

# Export portfolio weights
if 'portfolio' in results:
    weights_df = pd.DataFrame({
        'company': results['portfolio']['companies'],
        'weight': results['portfolio']['optimal_sharpe']['weights']
    })
    weights_df.to_csv('/home/claude/optimal_weights.csv', index=False)
    print("Saved optimal portfolio weights")

# Export alerts
if 'alerts' in results and len(results['alerts']) > 0:
    results['alerts'].to_csv('/home/claude/alerts.csv', index=False)
    print("Saved alerts")

print("\nâœ… All results exported successfully!")

## Next Steps

1. **Integrate with Real Data**: Replace sample data with actual ensemble output from your FinLlama system
2. **Backtesting**: Test strategies on historical data to validate performance
3. **Real-time Monitoring**: Set up automated alerts for live trading
4. **Custom Models**: Experiment with different architectures (GRU, Transformer, etc.)
5. **Multi-asset**: Extend to other asset classes beyond equities

For more information, see the README_FUTURE_WORK.md file.