# Verilog Parsar

## Introduction
- This is a verilog parser written in python.
- It can parse verilog file and generate a report

## Usage
- Check for the following:
    - Unreachable Blocks
    - Uninitialized Register
    - Inferring Latches
    - Unreachable State
    - Non Full Case
    - Non Parallel Case
    - Multiple Drivers
    - Arithmetic Overflow
    - Integer Overflow

# Imports

In [2232]:
import re
import sys
import os

In [2233]:
def file_reader(file_name):
    x = []
    try:
        # Open the file in read mode
        with open(file_name, 'r') as file:
            # Read and print each line
            for line in file:
                x.append(line.strip())  

    except FileNotFoundError:
        print(f"File not found: {file_name}")

    except Exception as e:
        print(f"An error occurred: {e}")
    
    return x


### Testing reading verilog file

In [2234]:
file_path = 'tst.v'

verilog_code = file_reader(file_path)
    
for i in verilog_code:
        print(i)

module UnreachableBlocks(data_out);
output reg data_out;
reg reach;
wire state;

initial
begin
reach = 1'b1;
end

always @(state)
begin
if (reach == 2'b0)
begin
data_out = 1'b1;
end
else
begin
data_out = 1'b0;
end
end
endmodule

module UninitializedRegister(data_out);
reg data;
output reg data_out;
assign data_out = data;
endmodule

module InferringLatches(enable, Data, out);
input wire enable, Data
output reg out;

always @(enable)
begin
if (enable)
begin
out = Data;
end
end
endmodule

module UnreachableState(clk, state_out);
input clk;
output reg [1:0] state_out;
reg [1:0] current_state, next_state;
localparam [1:0] S1 = 2'b00 ;
localparam [1:0] S2 = 2'b01 ;
localparam [1:0] S3 = 2'b10 ;

always @(posedge clk)
begin
current_state <= next_state;
end

always @(*)
begin
case (current_state)
S1:
begin
next_state <= S2;
end
S2:
begin
next_state <= S1;
end
S3:
begin
next_state <= S1;
end
endcase
state_out = current_state;

end
endmodule

module NonFullCase(y_out);
output reg [1:0] y_out;
r

### Dividing Modules

#### Utility Functions

In [2235]:
def counting_modules(verilog_code):
    module_counter = 0
    for i in verilog_code:
        if i == "endmodule":
            module_counter += 1
    return module_counter

In [2236]:
def creating_module_lists(verilog_code, module_lists, module_counter):
    for i in verilog_code:
        module_lists[len(module_lists)-module_counter].append(i)
        if i == "endmodule":
            module_counter -= 1
    return module_lists

In [2237]:
def remove_empty_strings(module_lists):
    for i in module_lists:
     while("" in i) : 
        i.remove("")
    return module_lists

In [2238]:
def remove_comments(module_lists):
    i_counter = 0
    for i in module_lists:
        
        j_counter = 0
        
        for j in i:
            
            if j.startswith('//'):
                i.remove(j)
            
            elif '//' in j:
                index = j.find('//')
                j = j[:index]
                module_lists[i_counter][j_counter] = j
            
            j_counter += 1

        i_counter += 1
    return module_lists

#### Applying utility functions

In [2239]:
module_counter = counting_modules(verilog_code)

In [2240]:
# create lists according to module counter
module_lists = [[] for i in range(module_counter)]
module_lists

[[], [], [], [], [], [], [], [], []]

In [2241]:
# append each module to a module_lists from the y
module_lists = creating_module_lists(verilog_code, module_lists, module_counter)

In [2242]:
# remove all empty strings from the module_lists
module_lists = remove_empty_strings(module_lists)

In [2243]:
module_lists = remove_comments(module_lists)

In [2244]:
module_lists

[['module UnreachableBlocks(data_out);',
  'output reg data_out;',
  'reg reach;',
  'wire state;',
  'initial',
  'begin',
  "reach = 1'b1;",
  'end',
  'always @(state)',
  'begin',
  "if (reach == 2'b0)",
  'begin',
  "data_out = 1'b1;",
  'end',
  'else',
  'begin',
  "data_out = 1'b0;",
  'end',
  'end',
  'endmodule'],
 ['module UninitializedRegister(data_out);',
  'reg data;',
  'output reg data_out;',
  'assign data_out = data;',
  'endmodule'],
 ['module InferringLatches(enable, Data, out);',
  'input wire enable, Data',
  'output reg out;',
  'always @(enable)',
  'begin',
  'if (enable)',
  'begin',
  'out = Data;',
  'end',
  'end',
  'endmodule'],
 ['module UnreachableState(clk, state_out);',
  'input clk;',
  'output reg [1:0] state_out;',
  'reg [1:0] current_state, next_state;',
  "localparam [1:0] S1 = 2'b00 ;",
  "localparam [1:0] S2 = 2'b01 ;",
  "localparam [1:0] S3 = 2'b10 ;",
  'always @(posedge clk)',
  'begin',
  'current_state <= next_state;',
  'end',
  'alway

### Unreachable State

### Non-Full Case

In [2245]:
def calculating_case_index(module_lists):
    cases_index = []
    for i in module_lists:
        for j in i:
            cmp = re.search("^case",j)
            if cmp:
                cases_index.append(module_lists.index(i))
    return cases_index

In [2246]:
def generating_reg_list(module_lists,cases_index):
    reg_list = []
    # in each list first number is case_index and second number is reg_index & size of reg is third number
    for i in cases_index:
        for j in module_lists[i]:
            if j.startswith('reg'):
                # store reg name & size in a list
                if ']' or '[' in j:
                    low_bound = j.find('[')
                    high_bound = j.find(']')
                    size = j[low_bound+1:high_bound]
                    size = size.split(':')
                    size = int(size[0])-int(size[1])+1
                    j = j[high_bound+1:].replace(';', '')
                    j = j.replace(' ', '')
                    variables_names = j.split(',')
                    for var_name in variables_names:
                        reg_list.append([i, var_name, size])
                else:
                    size = 1
                    low_bound = j.find('g')
                    j = j[low_bound+1:].replace(';', '')
                    j = j.replace(' ', '')
                    variables_names = j.split(',')
                    for var_name in variables_names:
                        reg_list.append([i, var_name, size])  
    #print(reg_list)
    return reg_list

In [2247]:
def check_full_case(module_lists):
    cases_index = calculating_case_index(module_lists)
    reg_list = generating_reg_list(module_lists,cases_index)
    
    size_list_for_case = []
    mask = 0
    for i in cases_index:
        for j in module_lists[i]:
            if j.startswith('case'):
                bound = j.find('e')
                reg_name = j[bound+1:]
                reg_name = reg_name.replace(' ', '')
                reg_name = reg_name.replace('(', '')
                reg_name = reg_name.replace(')', '')
                reg_name = reg_name.replace(':', '')
                reg_name = reg_name
                #print(reg_name)
                # take size of reg_name
                for k in reg_list:
                    if k[1] == reg_name and k[0] == i:
                        size = k[2]
                        size_list_for_case.append(size)
                        break
                case_i = module_lists[i].index(j)
                
                rows_count = 0
                for line in module_lists[i][case_i+1:]:
                    if line.startswith('endcase'):
                        break
                    if line.startswith('default'):
                        print("Full Case: ")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print("=====================================")
                        mask = 1
                        break
                    if ":" in line:
                        rows_count += 1
                #print("rows_count", rows_count)
                if mask == 0:
                    if pow(2, size) != rows_count:
                        print("Non-Full Case:")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print("=====================================")
                        break
                    else:
                        print("Full Case: ")
                        print("Module", i + 1, ":", module_lists[i][0])
                        break
                          
#print(size_list_for_case)   
check_full_case(module_lists)

Non-Full Case:
Module 4 : module UnreachableState(clk, state_out);
Non-Full Case:
Module 5 : module NonFullCase(y_out);
Full Case: 
Module 6 : module NonParallelCase(y_out);


### Unintialized registers

### Inferring Latches

In [2248]:
def check_infer_latch(module_lists):
    line_count = 0

    for module_index, module in enumerate(module_lists, start=1):
        always_blocks = []
        used_signals = set()
        for i, line in enumerate(module):
            if re.search(r'always\s*@', line):
                always_blocks.append(i)

            elif re.search(r'\b\w+\s*=\s*\w+\s*\(.*\)\s*;', line):
                # Example: Extract signals from the instantiation line
                instantiation_signals = re.findall(r'\b(\w+)\s*,?', line)
                used_signals.update(instantiation_signals)


        for always_index in always_blocks:
            sensitivity_line = module[always_index]
            sensitivity_line = sensitivity_line.replace("always", "").replace("@", "").strip()

            # Extract the block content including the line with 'always' keyword
            block_content = [sensitivity_line] + module[always_index + 1:]


        

            # Check for latch inference scenarios
            check_sensitivity_list(sensitivity_line, block_content, module_index, line_count + always_index + 1, used_signals)
            check_feedback_loop(block_content, module_index, line_count + always_index + 1)
            check_missing_initial_condition(block_content, module_index, line_count + always_index + 1)
            check_if_without_else(block_content, module_index, line_count + always_index + 1)
            check_case_without_default(block_content, module_index, line_count + always_index + 1)

        # Update line_count for the next module
        line_count += len(module)
# Update the check_sensitivity_list function
def check_sensitivity_list(sensitivity_line, block_content, module_index, line_number, used_signals):
    # Check if sensitivity_line is "@*" or contains clk
    if sensitivity_line == "@*" or "clk" in sensitivity_line:
        return

    # Extract signals from the sensitivity line
    sensitivity_list = re.findall(r'\b\w+\b', sensitivity_line)

    # Remove non-signal elements from the sensitivity list
    sensitivity_list = [signal for signal in sensitivity_list if signal not in ["*", "("]]

    # Check for missing signals
    missing_signals = set()
    for signal in sensitivity_list:
        if signal not in used_signals:
            missing_signals.add(signal)

    # Print results
    if missing_signals:
        print(f"May Infer Latch in module {module_index}, : {module_lists[module_index-1][0]} , line: {line_number}")
        print(f"Reason: Signal(s) missing in the sensitivity list: {', '.join(missing_signals)}")
        print("=====================================")



def check_if_without_else(block_content, module_index, line_number):
    found_if = False
    found_else = False

    for line in block_content:
            if re.search(r'\bif\b', line):
                found_if = True
            
            # Check for 'else' inside 'always' block
            if found_if and re.search(r'\belse\b', line):
                found_else = True

    # If we reach here, it means 'if' was not followed by 'else' inside 'always' block
    if found_if and not found_else:
        print(f"Infer Latch in module {module_index}, : {module_lists[module_index - 1][0]}, line: {line_number}")
        print("Reason: 'if' statement without 'else' detected")
        print("=====================================")

def check_case_without_default(block_content, module_index, line_number):
    inside_always_block = False
    for line in block_content:
        if re.search(r'always\s*@', line):
            inside_always_block = True
        elif re.search(r'end', line):
            inside_always_block = False

        if inside_always_block and re.search(r'^\s*case\b', line) and not re.search(r'\bdefault\b', line):
            print(f"May Infer Latch in module {module_index},: {module_lists[module_index-1][0]}, line: {line_number}")
            print("Reason: 'case' statement without 'default' detected")
            print("=====================================")

def check_missing_initial_condition(block_content, module_index, line_number):
     for line_index, line in enumerate(block_content):
        if re.search(r'^\s*\w+\s*=\s*\w+\s*;', line):
            print(f"May Infer Latch in module {module_index},: {module_lists[module_index-1][0]}, line: {line_number + line_index}")
            print("Reason: Missing initial condition")
            print("Missing Initial Condition Line:", line.strip())
            print("=====================================")
            return

def check_feedback_loop(block_content, module_index, line_number):
    dependencies = {}  # Dictionary to store signal dependencies
    visited = set()  # Set to keep track of visited signals
    stack = set()  # Set to keep track of signals in the current traversal

    def dfs(signal):
        if signal in stack:
            print(f"May Infer Latch in module {module_index}, : {module_lists[module_index-1][0]}, line: {line_number}")
            print("Reason: Feedback loop detected")
            print("=====================================")
            return

        if signal not in visited:
            visited.add(signal)
            stack.add(signal)

            if signal in dependencies:
                for dependent_signal in dependencies[signal]:
                    dfs(dependent_signal)

            stack.remove(signal)

    # Extract signal dependencies
    for line in block_content[1:]:  # Exclude the sensitivity line
        assignments = re.findall(r'\b\w+\s*<=\s*\w+\s*|\b\w+\s*=\s*\w+\s*;', line)
        for assignment in assignments:
            parts = re.split(r'<=|=', assignment)
            left_signal = parts[0].strip()
            right_signal = parts[1].strip()

            if left_signal not in dependencies:
                dependencies[left_signal] = set()
            dependencies[left_signal].add(right_signal)

    # Check for feedback loops
    for signal in dependencies:
        dfs(signal)


# Call the modified checker function
check_infer_latch(module_lists)


May Infer Latch in module 1, : module UnreachableBlocks(data_out); , line: 9
Reason: Signal(s) missing in the sensitivity list: state
May Infer Latch in module 3, : module InferringLatches(enable, Data, out); , line: 29
Reason: Signal(s) missing in the sensitivity list: enable
May Infer Latch in module 3,: module InferringLatches(enable, Data, out);, line: 33
Reason: Missing initial condition
Missing Initial Condition Line: out = Data;
Infer Latch in module 3, : module InferringLatches(enable, Data, out);, line: 29
Reason: 'if' statement without 'else' detected
May Infer Latch in module 4,: module UnreachableState(clk, state_out);, line: 64
Reason: Missing initial condition
Missing Initial Condition Line: state_out = current_state;
May Infer Latch in module 4,: module UnreachableState(clk, state_out);, line: 44
Reason: 'case' statement without 'default' detected
May Infer Latch in module 4,: module UnreachableState(clk, state_out);, line: 64
Reason: Missing initial condition
Missing In

### Unreachable Blocks

### Multiple Drivers

In [2249]:
def check_multidriven_variables_assign_statements(module_lists):
    for module_index, module in enumerate(module_lists, start=1):
        print("Module Number:", module_index)
        # Extracted variable names and sizes
        variables = set()
        # Assign statements and assigned variables
        assign_statements = []

        for line in module:
            # Check if the line contains an assign statement
            if 'assign' in line:
                parts = line.split()
                assign_index = parts.index('assign')

                # Extract the assigned variable name
                if assign_index < len(parts) - 1:
                    variable_name = parts[assign_index + 1].rstrip(';')
                    # Check if the variable is already assigned in another statement
                    if variable_name in variables:
                        assign_statements.append((line, variable_name))
                    else:
                        variables.add(variable_name)

        # Now 'assign_statements' contains the assign statements and the assigned variables
        print(f"Module {module_index}:")
        for statement, variable in assign_statements:
            print(f"Multidriven Variable '{variable}': {statement}")

        # Check for multidriven variables within the same module based on assign statements
        seen_variables = set()
        for statement, variable in assign_statements:
            # Check if the variable is repeated (multidriven) in the same module
            if variable in seen_variables:
                print(f"Module {module_index}: Variable '{variable}' is multidriven.")
            else:
                seen_variables.add(variable)

        # Print the results
        print("Assign Statements:", assign_statements)
        print("Multidriven Variables:", seen_variables)
        print("---------------------")

# Example usage
check_multidriven_variables_assign_statements(module_lists)

def check_multidriven_variables_always_blocks(module_lists):
    for module_index, module in enumerate(module_lists, start=1):
        print("Module Number:", module_index)
        # Always blocks and their contents
        always_blocks = []

        # Extract contents of always blocks
        inside_always = False
        current_always_block = []

        for line in module:
            if 'always' in line:
                inside_always = True
                current_always_block = ['always']
            elif inside_always:
                current_always_block.append(line.strip())
                if 'end' in line:
                    inside_always = False
                    always_blocks.append(current_always_block)

        # Now 'always_blocks' contains the contents of always blocks
        print(f"Module {module_index}:")
        for block in always_blocks:
            print("Always Block:", block)

        # Compare always blocks to identify multidriven variables
        seen_variables = set()
        for i, block1 in enumerate(always_blocks):
            for j, block2 in enumerate(always_blocks):
                if i != j:
                    # Check for 'variable ='
                    for line1 in block1[2:-1]:  # Skip 'always', 'begin', 'end'
                        for line2 in block2[2:-1]:  # Skip 'always', 'begin', 'end'
                            if '=' in line1 and '=' in line2:
                                variable_name1 = line1.split('=')[0].strip()
                                variable_name2 = line2.split('=')[0].strip()
                                if variable_name1 == variable_name2:
                                    seen_variables.add(variable_name1)
                                    print(f"Module {module_index}: Variable '{variable_name1}' is multidriven.")

        # Print the results
        print("Multidriven Variables:", seen_variables)
        print("---------------------")

# Example usage
check_multidriven_variables_always_blocks(module_lists)



Module Number: 1
Module 1:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 2
Module 2:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 3
Module 3:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 4
Module 4:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 5
Module 5:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 6
Module 6:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 7
Module 7:
Multidriven Variable 'out': assign out = 0'b1;
Assign Statements: [("assign out = 0'b1;", 'out')]
Multidriven Variables: {'out'}
---------------------
Module Number: 8
Module 8:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 9
Module 9:
Assign Statements: []
Multidriven Variables: set()
---------------------
Module Number: 1
Modul

### Arithmetic Overflow

In [2250]:
def checkArithmeticOverflow(module_lists):
    # extracting the variables from the module
    variable_list = [[] for _ in range(len(module_lists))]
    
    for module_index, module in enumerate(module_lists, start=1):
        print("Module Number:", module_index)
        for variable_declaration in module:
            # check if the variable is input, output, wire, or reg
            if variable_declaration.startswith(('input ', 'output ', 'wire ', 'reg ')):
                # Extract variable name and size
                parts = variable_declaration.split()
                parts[-1] =  parts[-1].rstrip(';')
                # Variable names are strings after '[number:number]' or after 'reg', 'wire', 'input', 'output'
                variable_names = [part.strip(',') for part in parts[1:] if part not in ('reg', 'wire', 'input', 'output')]
                for i in variable_names:
                    if '[' in i and ']' in i:
                        variable_names.remove(i)
                
                        
                        
                variable_size = 1  # Default size is 1
                
                # Check if [number-1:0] pattern is present
                if '[' in variable_declaration and ']' in variable_declaration:
                    size_part = variable_declaration.split('[')[1].split(']')[0]
                    try:
                        # Extract the size correctly
                        if ':' in size_part:
                            sizes = size_part.split(':')
                            variable_size = abs(int(sizes[0]) - int(sizes[1])) + 1
                        else:
                            variable_size = int(size_part) + 1
                    except ValueError:
                        pass
                
                # Store the variable names and size
                variable_list[module_index - 1].extend([[name, variable_size] for name in variable_names if name])

        print("Variable List:", variable_list[module_index - 1])
        

        # extracting the operations from the module
        operation_list = []
        for operation in module:
            if '=' in operation:
                if '+' in operation or '-' in operation or '*' in operation or '/' in operation:
                    # Extract the operation
                    parts = operation.split('=')
                    parts[-1] = parts[-1].rstrip(';')
                    parts = [part.split(' ') for part in parts]
                    # Remove empty strings
                    for part in parts:
                        while '' in part:
                            part.remove('')

                    left_side_size = 0
                    right_side_size = 0
                    for variable in variable_list[module_index - 1]:
                        if variable[0] in parts[0]:
                            left_side_size = variable[1]
                            break
                    for variable in variable_list[module_index - 1]:
                        if variable[0] in parts[1]:
                            right_side_size = max(variable[1], right_side_size)
                    


                    if left_side_size <= right_side_size:
                        print("\nPossible Arithmetic Overflow in module", module_index, ":", module[0])
                        print("Line: ", operation)
                        print("Left side size:", left_side_size)
                        print("Right side size:", right_side_size)

                        

                        
                    operation_list.append(parts)


        print("Operation List:", operation_list)

        print()
    
checkArithmeticOverflow(module_lists)

Module Number: 1
Variable List: [['data_out', 1], ['reach', 1], ['state', 1]]
Operation List: []

Module Number: 2
Variable List: [['data', 1], ['data_out', 1]]
Operation List: []

Module Number: 3
Variable List: [['enable', 1], ['Data', 1], ['out', 1]]
Operation List: []

Module Number: 4
Variable List: [['clk', 1], ['state_out', 2], ['current_state', 2], ['next_state', 2]]
Operation List: []

Module Number: 5
Variable List: [['y_out', 2], ['x', 2], ['y', 2]]
Operation List: []

Module Number: 6
Variable List: [['y_out', 2], ['x', 2], ['y', 2]]
Operation List: []

Module Number: 7
Variable List: [['myIn', 2], ['y', 1]]

Possible Arithmetic Overflow in module 7 : module MultipleDrivers(input [1:0] x, output out);
Line:  y = y + 1;
Left side size: 1
Right side size: 1
Operation List: [[['y'], ['y', '+', '1']]]

Module Number: 8
Variable List: [['a', 4], ['b', 4], ['result', 4]]

Possible Arithmetic Overflow in module 8 : module ArithmeticOverflow(a,b,result);
Line:  assign result = a + 

### Non-Parallel Case