### Simple version without validation

In [None]:
from langchain_core.messages import SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate

# Setup the prompt templates
human_promt = HumanMessagePromptTemplate.from_template("{request}")
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a data scientist with 20 years of experience. You know everything about ML code writing. Write the code without comments, just pure code."),
    human_promt
])
model = Ollama(model="llama3", temperature=0)

# Prepare the request
request = chat_prompt.format_prompt(
    request="Write me a code for reading csv file and showing the first 5 rows of the data"
).to_messages()

# Get results from the LLM
response = model.invoke(request)
# Adjust based on the actual response structure
generated_code = response

# Clean the generated code
cleaned_code = generated_code.replace("```", "").strip()

# Write the cleaned and corrected code to a .py file
code_filename = "generated_code.py"
with open(code_filename, "w") as file:
    file.write(cleaned_code)

print(f"Generated code saved to {code_filename}")

# Since you don't want to execute the code, we skip the execution part

### Advanced version with validation

In [None]:
import sys
import io
import contextlib
from langchain_core.messages import SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate

# Setup the prompt templates
human_promt = HumanMessagePromptTemplate.from_template("{request}")
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a highly skilled data scientist with 20 years of experience. You specialize in writing clean, efficient, and error-free ML code. Generate code snippets without comments and ensure they are ready to execute."),
    human_promt
])
# Ensure reproducibility by setting a seed
model = Ollama(model="llama3", temperature=0)

# Prepare the request
request = chat_prompt.format_prompt(
    request="Write me a code for reading a CSV file and showing the first 5 rows of the data"
).to_messages()

# Get results from the LLM
response = model.invoke(request)
# Adjust based on the actual response structure
generated_code = response

# Clean the generated code
cleaned_code = generated_code.replace("```", "").strip()

# Validate the corrected code
def validate_code(code):
    with contextlib.redirect_stdout(io.StringIO()) as f:
        try:
            exec(code)
            return True, f.getvalue()
        except Exception as e:
            return False, str(e)


is_valid, validation_output = validate_code(cleaned_code)

if is_valid:
    # Write the validated code to a .py file
    code_filename = "generated_code_with_validation.py"
    with open(code_filename, "w") as file:
        file.write(cleaned_code)
    print(f"Generated code validated and saved to {code_filename}")
else:
    print(f"Code validation failed with error:\n{validation_output}")

# Since you don't want to execute the code via the agent, we skip the execution part

#### Need to fix this part: 
```python
# Correct the path in the code
corrected_code = cleaned_code.replace(
    "data.csv", "/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv")

In [None]:
import sys
import io
import contextlib
from langchain_core.messages import SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate

# Setup the prompt templates
human_promt = HumanMessagePromptTemplate.from_template("{request}")
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a highly skilled data scientist with 20 years of experience. You specialize in writing clean, efficient, and error-free ML code. Generate code snippets without comments and ensure they are ready to execute."),
    human_promt
])
# Ensure reproducibility by setting a seed
model = Ollama(model="llama3", temperature=0)

# Prepare the request
request = chat_prompt.format_prompt(
    request="Write me a code for reading a CSV file and showing the first 5 rows of the data"
).to_messages()

# Get results from the LLM
response = model.invoke(request)
# Adjust based on the actual response structure
generated_code = response

# Clean the generated code by removing any additional text or comments
cleaned_code = generated_code.replace("```", "").strip()
if "Here is the code snippet:" in cleaned_code:
    cleaned_code = cleaned_code.split("Here is the code snippet:")[1].strip()

# Correct the path in the code
corrected_code = cleaned_code.replace(
    "data.csv", "/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv")

# Print cleaned and corrected code for debugging
print("Corrected code:")
print(corrected_code)
print("\nValidating code...")

# Validate the corrected code
def validate_code(code):
    with contextlib.redirect_stdout(io.StringIO()) as f:
        try:
            exec(code)
            output = f.getvalue()
            return True, output
        except Exception as e:
            return False, f"Exception: {str(e)}\nCode:\n{code}"


is_valid, validation_output = validate_code(corrected_code)

if is_valid:
    # Write the validated code to a .py file
    code_filename = "generated_code_with_validation.py"
    with open(code_filename, "w") as file:
        file.write(corrected_code)
    print(f"Generated code validated and saved to {code_filename}")
    print(f"Validation output:\n{validation_output}")
else:
    print(f"Code validation failed with error:\n{validation_output}")

In [2]:
import sys
import io
import contextlib
from langchain_core.messages import SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate

# Setup the prompt templates
human_promt = HumanMessagePromptTemplate.from_template("{request}")
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a highly skilled data scientist with 20 years of experience. You specialize in writing clean, efficient, and error-free ML code. Generate code snippets without comments and ensure they are ready to execute."),
    human_promt
])
# Ensure reproducibility by setting a seed
model = Ollama(model="llama3", temperature=0)

# Prepare the request
request = chat_prompt.format_prompt(
    request=(
        "Write me a code for reading a CSV file, preprocessing the data (handling missing values, encoding categorical variables), "
        "splitting the data into training and test sets, training a decision tree classifier, and evaluating the model's performance."
    )
).to_messages()

# Get results from the LLM
response = model.invoke(request)
# Adjust based on the actual response structure
generated_code = response

# Clean the generated code by removing any additional text or comments
cleaned_code = generated_code.replace("```", "").strip()
if "Here is the code snippet:" in cleaned_code:
    cleaned_code = cleaned_code.split("Here is the code snippet:")[1].strip()
if "python" in cleaned_code:
    cleaned_code = cleaned_code.split("python")[1].strip()

# Correct the path in the code
corrected_code = cleaned_code.replace(
    "data.csv", "/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv")

# Print cleaned and corrected code for debugging
print("Corrected code:")
print(corrected_code)
print("\nValidating code...")

# Validate the corrected code
def validate_code(code):
    with contextlib.redirect_stdout(io.StringIO()) as f:
        try:
            exec(code)
            output = f.getvalue()
            return True, output
        except Exception as e:
            return False, f"Exception: {str(e)}\nCode:\n{code}"


is_valid, validation_output = validate_code(corrected_code)

if is_valid:
    # Write the validated code to a .py file
    code_filename = "generated_code_with_validation.py"
    with open(code_filename, "w") as file:
        file.write(corrected_code)
    print(f"Generated code validated and saved to {code_filename}")
    print(f"Validation output:\n{validation_output}")
else:
    print(f"Code validation failed with error:\n{validation_output}")

# Since you don't want to execute the code via the agent, we skip the execution part

Corrected code:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the CSV file
df = pd.read_csv('/Users/ilya/Desktop/GitHub_Repositories/HW_University/Data_Mining/datasets/Iris.csv')

# Preprocess the data
imputer = Imputer(strategy='mean')
df[['column1', 'column2']] = imputer.fit_transform(df[['column1', 'column2']])

ohe = OneHotEncoder(handle_unknown='ignore')
df[['category1', 'category2']] = ohe.fit_transform(df[['category1', 'category2']]).toarray()

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.2, random_state=42)

# Train a decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Evaluate the model's performance
y_pred