In [1]:
pip install -qU "langchain[groq]"

Note: you may need to restart the kernel to use updated packages.


In [2]:
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("deepseek-r1-distill-llama-70b", model_provider="groq")

Enter API key for Groq:  ········


In [3]:
pip install pandas scikit-learn langchain


Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
import re
import json
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from langchain.tools import Tool

In [5]:
# Extract target and feature from query
def extract_columns_from_query(query, df_columns):
    found_columns = []
    for col in df_columns:
        if re.search(rf"\b{re.escape(col)}\b", query, re.IGNORECASE):
            found_columns.append(col)
    return found_columns[:2] if len(found_columns) >= 2 else (None, None)

# Run regression
def run_linear_regression(df, target, feature):
    df = df[[feature, target]].dropna()
    X = df[[feature]]
    y = df[target]

    model = LinearRegression()
    model.fit(X, y)

    r2 = r2_score(y, model.predict(X))
    intercept = model.intercept_
    coef = model.coef_[0]
    return {
        "equation": f"{target} = {coef:.4f} * {feature} + {intercept:.4f}",
        "r2_score": r2
    }


In [17]:
def regression_tool_logic(json_input):
    inputs = json.loads(json_input)
    file_path = inputs["file_path"]
    query = inputs["query"]

    if file_path.endswith(".csv"):
        df = pd.read_csv(file_path)
    elif file_path.endswith(".json"):
        df = pd.read_json(file_path)
    else:
        return "Unsupported file format. Only .csv and .json are supported."

    target, feature = extract_columns_from_query(query, df.columns)
    if not target or not feature:
        return f"Could not extract two valid columns from query: '{query}'"

    try:
        result = run_linear_regression(df, target, feature)
        return f"✅ Regression Equation: {result['equation']}\n📊 R² Score: {result['r2_score']:.4f}"
    except Exception as e:
        return f"Error while performing regression: {str(e)}"

In [19]:
linear_regression_tool = Tool.from_function(
    name="linear_regression_tool",
    description="Performs linear regression between two columns from a CSV or JSON dataset based on user query.",
    func=regression_tool_logic
)

In [21]:
from langchain.agents import initialize_agent, AgentType
from langchain.agents import Tool
#from regression_tool import linear_regression_tool

In [23]:
tools = [linear_regression_tool]


In [25]:
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)


  agent = initialize_agent(


In [27]:
import json

query = "Predict orders_all_np based on DisplayAll_spend"
file_path = "assignment_data.csv"  # or .json
json_input = json.dumps({"file_path": file_path, "query": query})

response = agent.run(f"Use linear_regression_tool with this input: {json_input}")
print(response)



  response = agent.run(f"Use linear_regression_tool with this input: {json_input}")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m<think>
Okay, I need to use the linear_regression_tool with the given input. The input is a JSON object that specifies the file path as "assignment_data.csv" and the query as "Predict orders_all_np based on DisplayAll_spend". 

First, I should understand what the tool does. It performs linear regression between two columns from a dataset. The file is a CSV, but the tool accepts either CSV or JSON, so that's fine. 

The query asks to predict "orders_all_np" using "DisplayAll_spend". So, the dependent variable is orders_all_np, and the independent variable is DisplayAll_spend.

I should structure the JSON input correctly. The keys are "file_path" and "query". I need to make sure the file path is correct and that the columns exist in the dataset.

I don't have the actual data, but assuming the tool can handle it, I'll proceed. The tool will calculate the regression coefficients and possibly the R-squared value.

Once I run the t