In [31]:
from langgraph.graph import StateGraph, START, END
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
import os
from dotenv import load_dotenv
import json
import re
from pydantic import BaseModel, Field
from typing import List, Dict, Optional, Union, Literal
from pydantic import BaseModel, Field, validator

load_dotenv()

def sanitize_ascii(s: str) -> str:
    # Remove any non-ASCII characters from the string
    return ''.join(c for c in s if ord(c) < 128)

api_key = os.environ["GROQ_API_KEY"]
# api_key = os.environ["GEMINI_API_KEY"]
# api_key = os.environ["MISTRAL_API_KEY"]
# api_key = os.environ["OPENAI_API_KEY"]

model = "llama3-70b-8192"
# model = "gemini-2.0-flash"
# model = "codestral-latest"
# model = "gpt-4.1-nano"

sanitized_api_key = sanitize_ascii(api_key)
llm = ChatGroq(api_key=sanitized_api_key,model=model)
# llm = ChatGoogleGenerativeAI(api_key=api_key,model=model)
# llm = ChatMistralAI(api_key=api_key,model=model)
# llm = ChatOpenAI(api_key=api_key,model=model)


class TerraformFile(BaseModel):
    path: str
    content: str
    
class TerraformComponent(BaseModel):
    name: str = Field(..., description="The name of the component.")
    main_tf: str = Field(..., description="The main.tf file content.")
    output_tf: str = Field(..., description="The output.tf file content.")
    variables_tf: str = Field(..., description="The variables.tf file content.")
    
class EnvironmentList(BaseModel):
    environments: List[TerraformComponent] = []

class ModuleList(BaseModel):
    modules: List[TerraformComponent] = []


class UserInput(BaseModel):
    """User input for the Terraform code generation agent."""
    
    services: List[str] = Field(
        ..., 
        description="List of AWS services to deploy (e.g., ['ec2', 's3', 'rds', 'lambda'])."
    )
    region: str = Field(
        ..., 
        description="AWS region where services will be deployed (e.g., 'us-west-2')."
    )
    
    vpc_cidr: str = Field(
        ..., 
        description="CIDR block for the VPC (e.g., '10.0.0.0/16')."
    )
    subnet_configuration: Dict[str, List[str]] = Field(
        default_factory=lambda: {"public": [], "private": [], "database": []},
        description="CIDR blocks for subnets by type (public, private, database)."
    )
    availability_zones: List[str] = Field(
        ...,
        description="List of availability zones to use (e.g., ['us-west-2a', 'us-west-2b'])."
    )
    
    compute_type: str = Field(
        ..., 
        description="Type of compute to use (e.g., 'ec2', 'ecs', 'lambda')."
    )
    
    database_type: Optional[str] = Field(
        None, 
        description="Type of database to use if needed (e.g., 'mysql', 'postgres', 'dynamodb')."
    )
    
    is_multi_az: bool = Field(
        ..., 
        description="Whether to deploy across multiple availability zones for high availability."
    )
    is_serverless: bool = Field(
        ..., 
        description="Whether to use serverless architecture where applicable."
    )
    
    enable_logging: bool = Field(
        True, 
        description="Whether to enable CloudWatch logging for services."
    )
    enable_monitoring: bool = Field(
        True, 
        description="Whether to enable CloudWatch monitoring for services."
    )
    load_balancer_type: Optional[Literal["ALB", "NLB", "CLB"]] = Field(
        None,
        description="Type of load balancer to deploy if needed."
    )
    
    enable_waf: bool = Field(
        False, 
        description="Whether to enable AWS WAF for web applications."
    )
    
    tags: Dict[str, str] = Field(
        default_factory=lambda: {
            "Environment": "dev",
            "ManagedBy": "Terraform",
            "Owner": "DevOps"
        },
        description="Resource tags."
    )
    
    # Free-form requirements
    requirements: str = Field(
        ..., 
        description="Additional requirements in natural language."
    )
    
    # Advanced configuration
    custom_parameters: Dict[str, Union[str, int, bool, List, Dict]] = Field(
        default_factory=dict,
        description="Additional custom parameters for advanced configurations."
    )
    
    # Validators
    @validator('vpc_cidr')
    def validate_cidr(cls, v):
        import ipaddress
        try:
            ipaddress.IPv4Network(v)
            return v
        except ValueError:
            raise ValueError(f"Invalid CIDR block format: {v}")
    
    @validator('region')
    def validate_region(cls, v):
        valid_regions = [
            'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
            'eu-west-1', 'eu-west-2', 'eu-west-3', 'eu-central-1',
            'ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2',
            # Add more valid regions as needed
        ]
        if v not in valid_regions:
            raise ValueError(f"Invalid AWS region: {v}. Must be one of {valid_regions}")
        return v
    

class TerraformState(BaseModel):
    """State for our Terraform code generation agent."""
    modules: ModuleList = Field(default_factory=ModuleList)
    environments: EnvironmentList = Field(default_factory=EnvironmentList)
    user_input: Optional[UserInput] = None

# Prompt template
terraform_template = """
You're a senior AWS Solutions Architect creating production-grade Terraform code. Generate infrastructure as code based on the following specifications:

USER REQUIREMENTS:
{requirements}

INFRASTRUCTURE SPECIFICATIONS:
- AWS Services: {services}
- AWS Region: {region}
- VPC CIDR: {vpc_cidr}
- Subnet Configuration: {subnet_configuration}
- Availability Zones: {availability_zones}
- Compute Type: {compute_type}
- Multi-AZ Deployment: {is_multi_az}
- Serverless Architecture: {is_serverless}
- Load Balancer Type: {load_balancer_type}
- Logging Enabled: {enable_logging}
- Monitoring Enabled: {enable_monitoring}
- WAF Enabled: {enable_waf}
- Resource Tags: {tags} (include environment-specific Environment tag as well)
- Custom Parameters: {custom_parameters}

TERRAFORM BEST PRACTICES TO IMPLEMENT:
1. Create a modular design with proper service isolation
2. Implement proper network segregation (public/private/database subnets)
3. Follow least privilege IAM policies and proper encryption
4. Use environment-specific configurations with proper variable typing
5. Set up remote state management with appropriate locking
6. Include comprehensive tagging strategy 
7. Implement proper error handling with lifecycle management
8. Use proper Terraform AWS provider (version 5.0.0+)
9. Use data sources for dynamic lookups and proper resource repetition

ENVIRONMENT CONFIGURATIONS:
- dev: minimal capacity and redundancy
- stage: medium capacity with good redundancy
- prod: high capacity with full redundancy and auto-scaling

YOU MUST RESPOND WITH ONLY A VALID JSON OBJECT IN THE FOLLOWING STRUCTURE:

{{
  "environments": [
    {{
      "name": "dev",
      "main_tf": "# Terraform code here",
      "output_tf": "# Output variables here",
      "variables_tf": "# Input variables here"
    }},
    {{
      "name": "stage",
      "main_tf": "# Terraform code here",
      "output_tf": "# Output variables here",
      "variables_tf": "# Input variables here"
    }},
    {{
      "name": "prod",
      "main_tf": "# Terraform code here",
      "output_tf": "# Output variables here",
      "variables_tf": "# Input variables here"
    }}
  ],
  "modules": [
    # DYNAMIC: Include a module for each required AWS service
    "modules": [
    {{
      "name": "<service>",
      "main_tf": "# Module resources",
      "output_tf": "# Module outputs",
      "variables_tf": "# Module variables"
    }}
  ]
}}

# ADD ALL OTHER REQUIRED SERVICE MODULES BASED ON {services} and {requirements}

IMPORTANT NOTES:
1. ONLY RETURN THE JSON OBJECT - NO INTRODUCTION, EXPLANATION OR CODE BLOCKS
2. CREATE MODULES FOR ALL SERVICES IN THE SERVICE LIST: {services}
3. EACH MODULE SHOULD BE COMPLETE AND DEPLOYABLE
4. ENSURE ALL JSON IS PROPERLY FORMATTED WITH CORRECT QUOTES AND COMMAS
5. DO NOT USE PLACEHOLDERS - PROVIDE ACTUAL WORKING CODE
"""

def process_request(state: TerraformState):
    """Process the user's input and update the state."""
    # state.user_input = user_input
    print("Processing user input...")
    print(state.user_input)
    return state


def extract_json_from_text(text):
    """Extract JSON from text with more aggressive parsing."""
    # First, try to find complete JSON in the text
    text = text.strip()
    
    # Check if the text as a whole is valid JSON
    try:
        json_data = json.loads(text)
        return text
    except json.JSONDecodeError:
        pass
    
    # Try to find JSON in code blocks
    json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
    if json_match:
        try:
            json_str = json_match.group(1).strip()
            json.loads(json_str)  # Validate
            return json_str
        except json.JSONDecodeError:
            pass
    
    # Try to find JSON between curly braces with beginning and end of the content
    json_match = re.search(r'(\{[\s\S]*\})', text)
    if json_match:
        try:
            json_str = json_match.group(1).strip()
            json.loads(json_str)  # Validate
            return json_str
        except json.JSONDecodeError:
            pass
    
    # If we've gotten this far, log the response and return empty JSON
    print(f"Failed to extract valid JSON. First 200 chars of response: {text[:200]}...")
    return '{}'
def generate_terraform_code(state: TerraformState):
    """Generate Terraform code based on the structured user input."""
    if not state.user_input:
        raise ValueError("User input is required to generate Terraform code")
    
    # Create user_input dictionary for prompt template
    user_input = {
        "requirements": state.user_input.requirements,
        "region": state.user_input.region,
        "vpc_cidr": state.user_input.vpc_cidr,
        "subnet_configuration": json.dumps(state.user_input.subnet_configuration),
        "availability_zones": ", ".join(state.user_input.availability_zones),
        "services": ", ".join(state.user_input.services),
        "compute_type": state.user_input.compute_type,
        "is_multi_az": str(state.user_input.is_multi_az),
        "is_serverless": str(state.user_input.is_serverless),
        "load_balancer_type": state.user_input.load_balancer_type or "None",
        "enable_logging": str(state.user_input.enable_logging),
        "enable_monitoring": str(state.user_input.enable_monitoring),
        "enable_waf": str(state.user_input.enable_waf),
        "tags": json.dumps(state.user_input.tags),
        "custom_parameters": json.dumps(state.user_input.custom_parameters)
    }
    
    prompt = PromptTemplate.from_template(terraform_template)
    
    chain = prompt | llm
    response = chain.invoke(user_input)
    
    try:
        # Extract JSON from the response
        json_str = extract_json_from_text(response.content)
        
        # Parse the JSON response
        data = json.loads(json_str)
        
        # Update environments
        for env_data in data.get("environments", []):
            component = TerraformComponent(
                name=env_data.get("name", ""),
                main_tf=env_data.get("main_tf", ""),
                output_tf=env_data.get("output_tf", ""),
                variables_tf=env_data.get("variables_tf", "")
            )
            state.environments.environments.append(component)
        
        # Update modules
        for module_data in data.get("modules", []):
            component = TerraformComponent(
                name=module_data.get("name", ""),
                main_tf=module_data.get("main_tf", ""),
                output_tf=module_data.get("output_tf", ""),
                variables_tf=module_data.get("variables_tf", "")
            )
            state.modules.modules.append(component)
            
    except Exception as e:
        print(f"Error parsing LLM response: {e}")
        print(f"Response content: {response.content}")
    
    return state

# Function to save generated Terraform files
def save_terraform_files(state: TerraformState):
    """Save the generated Terraform files to disk."""
    
    base_dir = "output/src"
     
    os.makedirs(base_dir, exist_ok=True)
    
    for env in state.environments.environments:
        
        env_dir = os.path.join(base_dir, "environments", env.name)
        os.makedirs(env_dir, exist_ok=True)
        
        with open(os.path.join(env_dir, "main.tf"), "w") as f:
            f.write(env.main_tf)
        
        with open(os.path.join(env_dir, "output.tf"), "w") as f:
            f.write(env.output_tf)
        
        with open(os.path.join(env_dir, "variables.tf"), "w") as f:
            f.write(env.variables_tf)
    
    for module in state.modules.modules:
       
        module_dir = os.path.join(base_dir, "modules", module.name)
        os.makedirs(module_dir, exist_ok=True)
        
        with open(os.path.join(module_dir, "main.tf"), "w") as f:
            f.write(module.main_tf)
        
        with open(os.path.join(module_dir, "output.tf"), "w") as f:
            f.write(module.output_tf)
        
        with open(os.path.join(module_dir, "variables.tf"), "w") as f:
            f.write(module.variables_tf)
    
    print(f"Terraform files have been saved to {base_dir}")
    
    return state

# Define the graph
graph = StateGraph(TerraformState)

# Add nodes
graph.add_node("process_request", process_request)
graph.add_node("generate_terraform_code", generate_terraform_code)
graph.add_node("save_terraform_files", save_terraform_files)

# Add edges
graph.add_edge(START, "process_request")
graph.add_edge("process_request", "generate_terraform_code")
graph.add_edge("generate_terraform_code", "save_terraform_files")
graph.add_edge("save_terraform_files", END)

# Compile the graph
terraform_app = graph.compile()


/var/folders/07/8j6bcwpn5_qfb0_tjmt2qg100000gn/T/ipykernel_58504/1346907910.py:139: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator('vpc_cidr')
/var/folders/07/8j6bcwpn5_qfb0_tjmt2qg100000gn/T/ipykernel_58504/1346907910.py:148: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator('region')


In [32]:
def generate_terraform(
    services: List[str],
    region: str,
    vpc_cidr: str,
    availability_zones: List[str],
    compute_type: str,
    database_type: Optional[str] = None,
    subnet_configuration: Optional[Dict[str, List[str]]] = None,
    is_multi_az: bool = True,
    is_serverless: bool = False,
    enable_logging: bool = True,
    enable_monitoring: bool = True,
    enable_waf: bool = False,
    load_balancer_type: Optional[str] = None,
    tags: Optional[Dict[str, str]] = None,
    custom_parameters: Optional[Dict] = None,
    requirements: str = ""
):
    """
    Generate Terraform code based on user input parameters.
    
    Args:
        services: List of AWS services to deploy
        region: AWS region
        vpc_cidr: CIDR block for VPC
        availability_zones: List of availability zones
        compute_type: Type of compute (ec2, ecs, lambda)
        database_type: Type of database if needed
        subnet_configuration: CIDR blocks for different subnet types
        is_multi_az: Whether to use multiple AZs
        is_serverless: Whether to use serverless architecture
        enable_logging: Whether to enable logging
        enable_monitoring: Whether to enable monitoring
        enable_waf: Whether to enable AWS WAF
        load_balancer_type: Type of load balancer
        tags: Resource tags
        custom_parameters: Additional configuration parameters
        requirements: Free-form requirements
        
    Returns:
        Generated Terraform code as a TerraformState object
    """
    if subnet_configuration is None:
        subnet_configuration = {
            "public": [],
            "private": [],
            "database": []
        }
    
    if tags is None:
        tags = {"Owner": "DevOps"}
    
    if custom_parameters is None:
        custom_parameters = {}
    
    user_input = UserInput(
        services=services,
        region=region,
        vpc_cidr=vpc_cidr,
        subnet_configuration=subnet_configuration,
        availability_zones=availability_zones,
        compute_type=compute_type,
        database_type=database_type,
        is_multi_az=is_multi_az,
        is_serverless=is_serverless,
        enable_logging=enable_logging,
        enable_monitoring=enable_monitoring,
        enable_waf=enable_waf,
        load_balancer_type=load_balancer_type,
        tags=tags,
        custom_parameters=custom_parameters,
        requirements=requirements
    )
    
    result = terraform_app.invoke({"user_input": user_input})
    return result

In [33]:
# Example usage with comprehensive structured input
result = generate_terraform(
    services=["ec2", "rds", "alb"],
    region="us-west-2",
    vpc_cidr="10.0.0.0/16",
    subnet_configuration={
        "public": ["10.0.1.0/24", "10.0.2.0/24"],
        "private": ["10.0.3.0/24", "10.0.4.0/24"],
        "database": ["10.0.5.0/24", "10.0.6.0/24"]
    },
    availability_zones=["us-west-2a", "us-west-2b"],
    compute_type="ec2",
    database_type="postgres",
    is_multi_az=True,
    is_serverless=True,
    enable_logging=True,
    enable_monitoring=True,
    enable_waf=True,
    load_balancer_type="ALB",
    tags={
        "Project": "WebApp",
        "Owner": "DevOps",
        "CostCenter": "IT-123"
    },
    custom_parameters={
        "enable_auto_scaling": True,
        "min_capacity": 2,
        "max_capacity": 10,
        "desired_capacity": 2,
        "backup_retention_period": 7
    },
    requirements="Create a highly available web application with a PostgreSQL database. Include proper security groups and implement auto-scaling for the EC2 instances."
)

print(result)


Processing user input...
services=['ec2', 'rds', 'alb'] region='us-west-2' vpc_cidr='10.0.0.0/16' subnet_configuration={'public': ['10.0.1.0/24', '10.0.2.0/24'], 'private': ['10.0.3.0/24', '10.0.4.0/24'], 'database': ['10.0.5.0/24', '10.0.6.0/24']} availability_zones=['us-west-2a', 'us-west-2b'] compute_type='ec2' database_type='postgres' is_multi_az=True is_serverless=True enable_logging=True enable_monitoring=True load_balancer_type='ALB' enable_waf=True tags={'Project': 'WebApp', 'Owner': 'DevOps', 'CostCenter': 'IT-123'} requirements='Create a highly available web application with a PostgreSQL database. Include proper security groups and implement auto-scaling for the EC2 instances.' custom_parameters={'enable_auto_scaling': True, 'min_capacity': 2, 'max_capacity': 10, 'desired_capacity': 2, 'backup_retention_period': 7}
Failed to extract valid JSON. First 200 chars of response: Here is the Terraform infrastructure as code based on the provided specifications:

```
{
  "environment

In [None]:
# Example usage with comprehensive structured input


# result = generate_terraform(
#     services=["ec2", "rds", "alb"],
#     region="us-west-2",
#     vpc_cidr="10.0.0.0/16",
#     subnet_configuration={
#         "public": ["10.0.1.0/24", "10.0.2.0/24"],
#         "private": ["10.0.3.0/24", "10.0.4.0/24"],
#         "database": ["10.0.5.0/24", "10.0.6.0/24"]
#     },
#     availability_zones=["us-west-2a", "us-west-2b"],
#     compute_type="ec2",
#     compute_instance_type="t3.medium",
#     database_type="postgres",
#     database_settings={
#         "version": "13.4",
#         "instance_type": "db.t3.medium",
#         "storage_gb": 20,
#         "multi_az": True
#     },
#     is_multi_az=True,
#     is_serverless=False,
#     enable_logging=True,
#     enable_monitoring=True,
#     enable_waf=True,
#     load_balancer_type="ALB",
#     security_groups={
#         "web_sg": [
#             {
#                 "type": "ingress",
#                 "from_port": 443,
#                 "to_port": 443,
#                 "protocol": "tcp",
#                 "cidr_blocks": ["0.0.0.0/0"]
#             },
#             {
#                 "type": "ingress",
#                 "from_port": 80,
#                 "to_port": 80,
#                 "protocol": "tcp",
#                 "cidr_blocks": ["0.0.0.0/0"]
#             }
#         ],
#         "app_sg": [
#             {
#                 "type": "ingress",
#                 "from_port": 8080,
#                 "to_port": 8080,
#                 "protocol": "tcp",
#                 "source_security_group": "web_sg"
#             }
#         ]
#     },
#     tags={
#         "Environment": "dev",
#         "Project": "WebApp",
#         "Owner": "DevOps",
#         "CostCenter": "IT-123"
#     },
#     custom_parameters={
#         "enable_auto_scaling": True,
#         "min_capacity": 2,
#         "max_capacity": 10,
#         "desired_capacity": 2,
#         "backup_retention_period": 7
#     },
#     requirements="Create a highly available web application with a PostgreSQL database. Include proper security groups and implement auto-scaling for the EC2 instances."
# )

