In [1]:
import os
import pandas as pd
from crewai import Crew, Task, Agent, Process
from crewai_tools import FileReadTool
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
csv_tool = FileReadTool(file_path='datos.csv')

In [3]:
dataset_inference_agent = Agent(
    role="Dataset Context Specialist",
    goal=(
        "Infer the context and purpose of the dataset by analyzing column names, data types, "
        "and a few sample rows. Extract insights about the domain and the type of data provided."
    ),
    backstory=(
        "An expert in understanding datasets and identifying their purpose. You have a deep understanding of data science, "
        "machine learning, and data analysis."
    ),
    tools=[csv_tool],
    verbose=True,
    allow_code_execution=True
)

LLM value is None


In [4]:
data_analysis_agent = Agent(
    role="Data Cleaning Specialist",
    goal=(
        "Analyze the dataset to identify missing values, incorrect data types, and potential outliers. "
        "Generate statistical summaries like mean, median, and correlations between variables."
    ),
    backstory=(
        "Specializes in cleaning and preparing data for analysis with expertise in data cleaning and preprocessing."
    ),
    tools=[csv_tool],
    verbose=True
)

LLM value is None


In [5]:
visualization_agent = Agent(
    role="Visualization Expert",
    goal=(
        "Generate meaningful visualizations such as histograms, scatter plots, line plots, bar charts, "
        "and heatmaps to provide insights into the data. Save all visualizations to a 'graphs/' directory."
    ),
    backstory=(
        "Specializes in creating compelling and informative visualizations. You are an expert in Python, pandas, "
        "matplotlib, seaborn, and data visualization, capable of creating impactful data stories."
    ),
    tools=[csv_tool],
    verbose=True,
    allow_code_execution=True
)

LLM value is None


In [6]:
markdown_report_agent = Agent(
    role="Report Specialist",
    goal=(
        "Compile all findings, analysis, and visualizations into a structured markdown report. "
        "Embed graphs and provide clear sections for analysis and summary."
    ),
    backstory="An expert in synthesizing data insights into polished reports.",
    tools=[csv_tool],
    verbose=True,
)

LLM value is None


In [7]:
dataset_inference_task = Task(
    description="Analyze the dataset to determine its context, purpose, and structure.",
    expected_output="A descriptive overview of the dataset's structure and purpose.",
    agent=dataset_inference_agent
)

In [8]:
data_analysis_task = Task(
    description="Perform a comprehensive analysis of the dataset to identify missing values, incorrect data types, and potential outliers.",
    expected_output="A summary of missing values, standardized data types, and statistical metrics.",
    agent=data_analysis_agent
)

In [9]:
visualization_task = Task(
    description="Generate visualizations dynamically based on the dataset's content.",
    expected_output="A set of annotated graphs saved in the 'graphs/' directory.",
    agent=visualization_agent
)

In [10]:
markdown_report_task = Task(
    description="Create a detailed markdown report summarizing all analysis and visualizations.",
    expected_output="A polished markdown report with embedded graphs and actionable insights.",
    agent=markdown_report_agent,
    context=[dataset_inference_task, data_analysis_task, visualization_task],
    output_file='report.md'
)

In [11]:
csv_analysis_crew = Crew(
    agents=[
        dataset_inference_agent,
        data_analysis_agent,
        visualization_agent,
        markdown_report_agent
    ],
    tasks=[dataset_inference_task, data_analysis_task, visualization_task, markdown_report_task],
    process=Process.sequential,
    verbose=True
)

In [12]:
result = csv_analysis_crew.kickoff()
print("Crew Execution Complete. Final report generated.")
print(result)

[1m[95m# Agent:[00m [1m[92mDataset Context Specialist[00m
[95m## Task:[00m [92mAnalyze the dataset to determine its context, purpose, and structure.[00m


[1m[95m# Agent:[00m [1m[92mDataset Context Specialist[00m
[95m## Thought:[00m [92mI need to understand the content of the dataset to determine its context and purpose. I will start by reading the file's content to analyze the column names, data types, and sample rows.[00m
[95m## Using tool:[00m [92mRead a file's content[00m
[95m## Tool Input:[00m [92m
"{\"file_path\": \"datos.csv\"}"[00m
[95m## Tool Output:[00m [92m
Palabra clave,Costo,Impr.,Clics,Conversiones
[tada],8968,2676,1286,1957
[club tada],354,51,15,124
[ta da app],47,24,9,5
[tada delivery méxico],225,55,13,43
"""tada mexico""",695,138,32,55
[ta da delivery],314,150,65,128
[tada tada],0,2,0,0
[modelorama now],65,22,5,0
[aplicacion tada],17,12,3,24
[tada delivery],2167,589,227,387
[promociones modelorama],0,8,0,0
[tada delivery mexico],29,11,3,0