In [None]:
from langchain import hub
from langchain_google_vertexai import VertexAI

from langgraph.prebuilt import create_react_agent

# from langchain_openai import ChatOpenAI
# llm = ChatOpenAI(model="gpt-4-turbo-preview")

# To use model
llm = VertexAI(model_name="gemini-1.5-pro")
prompt = "You are a helpful assistant."
agent_executor = create_react_agent(llm, tools, prompt=prompt)

In [None]:
import os
from typing import Dict, List, TypedDict, Annotated
from datetime import datetime
from langgraph.graph import Graph, StateGraph
from langchain_google_vertexai import VertexAI
from langgraph.prebuilt import ToolExecutor
import pandas as pd
import matplotlib.pyplot as plt
from vertexai.generative_models import GenerationConfig, GenerativeModel
import json
import seaborn as sns

class AnalysisState(TypedDict):
    csv_path: str
    transcript_path: str
    data_analysis: Dict | None
    transcript_analysis: Dict | None
    visualizations: List[Dict] | None
    report_path: str | None

class ReportGeneratorAgent:
    def __init__(self):
        self.model = VertexAI(model_name="gemini-1.5-pro")
        self.tools = self._create_tools()
        self.graph = self._create_graph()
    
    def _create_tools(self):
        return {
            "analyze_data": self._analyze_csv_data,
            "analyze_transcript": self._analyze_transcript,
            "create_visualization": self._create_visualization,
            "generate_report": self._generate_final_report
        }
    
    def _analyze_csv_data(self, state: AnalysisState) -> AnalysisState:
        """分析 CSV 數據"""
        df = pd.read_csv(state["csv_path"])
        
        # 基本統計分析
        analysis = {
            "row_count": len(df),
            "column_count": len(df.columns),
            "numerical_columns": df.select_dtypes(include=['float64', 'int64']).columns.tolist(),
            "categorical_columns": df.select_dtypes(include=['object']).columns.tolist(),
            "basic_stats": df.describe().to_dict()
        }
        
        state["data_analysis"] = analysis
        return state
    
    def _analyze_transcript(self, state: AnalysisState) -> AnalysisState:
        """分析逐字稿內容"""
        with open(state["transcript_path"], "r", encoding="utf-8") as f:
            transcript = f.read()
        
        prompt = f"""
        請分析以下逐字稿的主要內容，提供：
        1. 主要主題
        2. 關鍵觀點
        3. 重要數據或引用
        
        逐字稿內容：
        {transcript}
        
        請以 JSON 格式回傳分析結果。
        """
        response_schema = {
            "type": "ARRAY",
            "items": {
                "type": "OBJECT",
                "properties": {
                    "topic": {"type": "STRING"},
                    "critical_point": {"type": "ARRAY", "items": {"type": "STRING"}},
                },
                "required": ["topic", "critical_point"],
            },
        }
        
        response = self.model.client.generate_content(
            prompt,
            generation_config=GenerationConfig(
                response_mime_type="application/json", response_schema=response_schema
            ),
        )
        analysis = json.loads(response.text)
        
        state["transcript_analysis"] = analysis
        return state
    
    def _create_visualization(self, state: AnalysisState) -> AnalysisState:
        """根據數據創建視覺化圖表"""
        df = pd.read_csv(state["csv_path"])
        visualizations = []
        
        # 為數值型列創建圖表
        for col in state["data_analysis"]["numerical_columns"]:
            # 時間序列圖
            if "date" in col.lower() or "time" in col.lower():
                plt.figure(figsize=(10, 6))
                plt.plot(df[col], df[state["data_analysis"]["numerical_columns"][1]])
                plt.title(f"{col} Trend Analysis")
                plt.xticks(rotation=45)
                plt.tight_layout()
                
                # 保存圖表
                fig_path = f"visualization_{len(visualizations)}.png"
                plt.savefig(fig_path)
                plt.close()
                visualizations.append({"type": "trend", "path": fig_path})
            
            # 分布圖
            else:
                plt.figure(figsize=(10, 6))
                sns.histplot(data=df, x=col)
                plt.title(f"{col} Distribution")
                plt.tight_layout()
                
                fig_path = f"visualization_{len(visualizations)}.png"
                plt.savefig(fig_path)
                plt.close()
                visualizations.append({"type": "distribution", "path": fig_path})
        
        state["visualizations"] = visualizations
        return state
    
    def _generate_final_report(self, state: AnalysisState) -> AnalysisState:
        """生成最終報告"""
        prompt = f"""
        請根據以下資訊生成一份完整的分析報告：
        
        1. 數據分析結果：
        {json.dumps(state["data_analysis"], indent=2, ensure_ascii=False)}
        
        2. 逐字稿分析：
        {json.dumps(state["transcript_analysis"], indent=2, ensure_ascii=False)}
        
        3. 已生成的視覺化圖表：
        {json.dumps(state["visualizations"], indent=2, ensure_ascii=False)}
        
        請生成一份結構完整的報告，包含：
        - 執行摘要
        - 數據分析發現
        - 逐字稿重點
        - 結論與建議
        
        格式要求：Markdown格式
        """
        
        report = self.model.predict(prompt)
        
        # 保存報告
        report_path = f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
        with open(report_path, "w", encoding="utf-8") as f:
            f.write(report)
        
        state["report_path"] = report_path
        return state
    
    def _create_graph(self) -> Graph:
        """創建工作流程圖"""
        # 使用定義的 AnalysisState 作為 schema
        workflow = StateGraph(state_schema=AnalysisState)
        
        # 添加節點
        workflow.add_node("analyze_data", self._analyze_csv_data)
        workflow.add_node("analyze_transcript", self._analyze_transcript)
        workflow.add_node("create_visualization", self._create_visualization)
        workflow.add_node("generate_report", self._generate_final_report)
        
        # 設定工作流程
        workflow.set_entry_point("analyze_data")
        workflow.add_edge("analyze_data", "analyze_transcript")
        workflow.add_edge("analyze_transcript", "create_visualization")
        workflow.add_edge("create_visualization", "generate_report")
        
        return workflow.compile()
    
    def generate_report(self, csv_path: str, transcript_path: str) -> str:
        """執行報告生成流程"""
        initial_state: AnalysisState = {
            "csv_path": csv_path,
            "transcript_path": transcript_path,
            "data_analysis": None,
            "transcript_analysis": None,
            "visualizations": None,
            "report_path": None
        }
        
        # 執行工作流程
        final_state = self.graph.invoke(initial_state)
        
        return final_state["report_path"]



In [19]:
agent = ReportGeneratorAgent()
report_path = agent.generate_report(
    csv_path="FIN_data.csv",
    transcript_path="test.txt"
)
print(f"Report generated at: {report_path}")

Report generated at: report_20250214_225627.md
