In [1]:
import os
import numpy as np
import pandas as pd
import yfinance as yf
import pmdarima as pm
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Any
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults



In [2]:
class FinancialAnalysisState(TypedDict):
    """
    State management for the financial analysis workflow
    """
    ticker: str
    start_date: str
    end_date: str
    raw_data: pd.DataFrame
    preprocessed_data: pd.DataFrame
    prediction_model: Any
    prediction_results: Dict[str, Any]
    market_insights: str
    visualization_paths: List[str]
    analyst_feedback: str

class FinancialAnalysisSystem:
    def __init__(self):
        # Initialize LLM and tools
        self.llm = ChatGroq(
            model="llama3-8b-8192",
            temperature=0.3,
            max_tokens=None
        )
        self.search_tool = TavilySearchResults(max_results=3)
        self.memory = MemorySaver()

    def data_ingestion_node(self, state: FinancialAnalysisState) -> Dict:
        """
        Fetch financial data for the specified ticker
        """
        try:
            # Download stock data
            df = yf.download(
                state['ticker'], 
                start=state['start_date'], 
                end=state['end_date']
            )
            
            return {
                'raw_data': df,
                'preprocessed_data': self._preprocess_data(df)
            }
        except Exception as e:
            return {'error': str(e)}

    def _preprocess_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Preprocess financial data
        """
        # Remove any rows with missing values
        df_cleaned = df.dropna()
        
        # Calculate additional features
        df_cleaned['Returns'] = df_cleaned['Close'].pct_change()
        df_cleaned['Rolling_Mean'] = df_cleaned['Close'].rolling(window=20).mean()
        df_cleaned['Rolling_Std'] = df_cleaned['Close'].rolling(window=20).std()
        
        return df_cleaned.dropna()

    def predictive_modeling_node(self, state: FinancialAnalysisState) -> Dict:
        """
        Build and run time series prediction using Auto ARIMA
        """
        try:
            # Prepare data for prediction
            data = state['preprocessed_data']['Close']
            
            # Fit Auto ARIMA model
            model = pm.auto_arima(
                data, 
                seasonal=True, 
                m=12,  # Monthly seasonality
                suppress_warnings=True,
                stepwise=True
            )
            
            # Forecast next week's prices
            forecast, conf_int = model.predict(n_periods=7, return_conf_int=True)
            
            return {
                'prediction_model': model,
                'prediction_results': {
                    'forecast': forecast.tolist(),
                    'confidence_interval': conf_int.tolist()
                }
            }
        except Exception as e:
            return {'error': str(e)}

    def market_insights_node(self, state: FinancialAnalysisState) -> Dict:
        """
        Generate market insights using LLM
        """
        try:
            # Prepare prompt with prediction and historical data context
            recent_data = state['preprocessed_data'].tail(10)
            predictions = state['prediction_results']['forecast']
            
            prompt = f"""
            Analyze the following financial data:
            Recent Stock Performance:
            {recent_data.to_string()}
            
            Price Predictions for Next Week:
            {predictions}
            
            Provide a comprehensive market insight including:
            1. Current market trend
            2. Potential investment risks
            3. Short-term price movement prediction
            4. Recommendation for investors
            """
            
            response = self.llm.invoke(prompt)
            
            return {
                'market_insights': response.content
            }
        except Exception as e:
            return {'error': str(e)}

    def visualization_node(self, state: FinancialAnalysisState) -> Dict:
        """
        Create visualizations for market analysis
        """
        try:
            # Price trend visualization
            plt.figure(figsize=(12, 6))
            plt.plot(state['preprocessed_data']['Close'], label='Historical Price')
            plt.plot(
                pd.date_range(
                    start=state['preprocessed_data'].index[-1], 
                    periods=8, 
                    freq='D'
                )[1:], 
                state['prediction_results']['forecast'], 
                color='red', 
                label='Predicted Price'
            )
            plt.title(f"{state['ticker']} Price Trend and Forecast")
            plt.xlabel("Date")
            plt.ylabel("Price")
            plt.legend()
            plt.tight_layout()
            
            # Save plot
            price_trend_path = f"{state['ticker']}_price_trend.png"
            plt.savefig(price_trend_path)
            plt.close()
            
            # Confidence interval visualization
            plt.figure(figsize=(12, 6))
            conf_int = state['prediction_results']['confidence_interval']
            plt.fill_between(
                pd.date_range(
                    start=state['preprocessed_data'].index[-1], 
                    periods=8, 
                    freq='D'
                )[1:], 
                [ci[0] for ci in conf_int],
                [ci[1] for ci in conf_int],
                alpha=0.3, 
                label='Confidence Interval'
            )
            plt.title(f"{state['ticker']} Forecast Confidence Interval")
            plt.xlabel("Date")
            plt.ylabel("Price")
            plt.legend()
            plt.tight_layout()
            
            # Save plot
            confidence_path = f"{state['ticker']}_confidence_interval.png"
            plt.savefig(confidence_path)
            plt.close()
            
            return {
                'visualization_paths': [price_trend_path, confidence_path]
            }
        except Exception as e:
            return {'error': str(e)}

    def analyst_feedback_node(self, state: FinancialAnalysisState) -> Dict:
        """
        Generate a feedback mechanism for analysts
        """
        try:
            feedback_prompt = f"""
            Based on the following market analysis:
            
            Market Insights: {state['market_insights']}
            Predicted Prices: {state['prediction_results']['forecast']}
            
            Provide a critical review including:
            1. Strengths of the current analysis
            2. Potential blind spots
            3. Additional data points to consider
            4. Confidence level in the predictions
            """
            
            response = self.llm.invoke(feedback_prompt)
            
            return {
                'analyst_feedback': response.content
            }
        except Exception as e:
            return {'error': str(e)}

    def create_workflow(self):
        """Create LangGraph workflow for financial analysis"""
        graph = StateGraph(FinancialAnalysisState)
        
        # Define nodes (with prefixed names)
        graph.add_node("data_ingestion", self.data_ingestion_node)
        graph.add_node("predictive_modeling", self.predictive_modeling_node)
        graph.add_node("generate_market_insights", self.market_insights_node)
        graph.add_node("generate_visualization", self.visualization_node)
        graph.add_node("generate_analyst_feedback", self.analyst_feedback_node)
        
        # Define edges
        graph.add_edge(START, "data_ingestion")
        graph.add_edge("data_ingestion", "predictive_modeling")
        graph.add_edge("predictive_modeling", "generate_market_insights")
        graph.add_edge("generate_market_insights", "generate_visualization")
        graph.add_edge("generate_visualization", "generate_analyst_feedback")
        graph.add_edge("generate_analyst_feedback", END)
        
        return graph.compile()
        

    def run_analysis(self, ticker: str, start_date: str, end_date: str):
        """
        Execute complete financial analysis workflow
        """
        initial_state = {
            'ticker': ticker,
            'start_date': start_date,
            'end_date': end_date
        }
        
        workflow = self.create_workflow()
        result = list(workflow.stream(initial_state))[-1]
        
        return result



In [3]:
# Example usage
if __name__ == "__main__":
    system = FinancialAnalysisSystem()
    analysis_result = system.run_analysis(
        ticker='AAPL', 
        start_date='2023-01-01', 
        end_date='2024-03-25'
    )
    print(analysis_result)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
  return get_prediction_index(
  return get_prediction_index(


{'generate_analyst_feedback': {'analyst_feedback': "**Critical Review**\n\n**Strengths of the current analysis:**\n\n1. The analysis provides a comprehensive overview of the current market trend, highlighting both the positive and negative aspects of the stock's performance.\n2. The use of rolling mean and standard deviation of returns provides a quantitative assessment of the stock's stability and volatility.\n3. The analysis acknowledges potential risks, including volatility, overbought conditions, and economic uncertainty, which is essential for investors to consider.\n\n**Potential blind spots:**\n\n1. The analysis focuses primarily on the short-term price movement prediction, which may not capture the stock's long-term potential or potential catalysts that could impact its performance.\n2. The analysis does not provide a detailed examination of the company's financials, management team, or competitive landscape, which could be important factors in determining the stock's future pe

#### Example payload
```
tech stock:

{

    "ticker": "AAPL",

    "start_date": "2023-01-01",

    "end_date": "2024-01-01"

}

Financial stock:
{

    "ticker": "MSFT",

    "start_date": "2023-01-01"

}

Without end date (will use current date):

{

    "ticker": "GOOGL",

    "start_date": "2022-06-01",

    "end_date": "2024-01-01"

}
```