# Verilog Parsar

## Introduction
- This is a verilog parser written in python.
- It can parse verilog file and generate a report

## Usage
- Check for the following:
    - Unreachable Blocks
    - Uninitialized Register
    - Inferring Latches
    - Unreachable State
    - Non Full Case
    - Non Parallel Case
    - Multiple Drivers
    - Arithmetic Overflow
    - Integer Overflow

# Imports

In [1]:
import re
import sys
import os

In [2]:
def file_reader(file_name):
    x = []
    try:
        # Open the file in read mode
        with open(file_name, 'r') as file:
            # Read and print each line
            for line in file:
                x.append(line.strip())  

    except FileNotFoundError:
        print(f"File not found: {file_name}")

    except Exception as e:
        print(f"An error occurred: {e}")
    
    return x


### Testing reading verilog file

In [3]:
file_path = 'tst.v'

verilog_code = file_reader(file_path)
    
for i in verilog_code:
        print(i)

module n1 (A);
input reg A;
always @(*)
begin
case (A):
1'b0: A = 1'b1;
1'b1: A = 1'b0;
endcase
end

endmodule

module ExciplitDefault (A);
input reg [3:0] A;
reg [1:0] B;
B = 2'b00;
always @(*)
begin
case (A):
4'b0: B = 2'b1;
4'b1: B = 2'b0;
endcase
end

endmodule

module n2 (A);
input reg [1:0] A;
always @(*)
begin
case (A):
1'b0: A = 1'b1;
1'b1: A = 1'b0;
endcase
end

endmodule

module n3 (A);
input reg [3:0] A;
always @(*)
begin
casez (A): // synopsys full_case parallel_case
4'b???1: F = 2'b00;
4'b??1?: F = 2'b01;
4'b?1??: F = 2'b10;
4'bl???: F = 2'b11;
endcase
end

endmodule

module n4 (t);
input reg [3:0] t;
always @(*)
begin
case (t): // synopsys full_case
4'b0000: t = 4'b0001;
4'b0001: t = 4'b0010;
endcase
end
endmodule

module UnreachableBlocks(data_out);
output reg data_out;
reg reach;
wire state;

initial
begin
reach = 1'b1;
end

always @(state)
begin
if (reach == 2'b0)
begin
data_out = 1'b1;
end
else
begin
data_out = 1'b0;
end
end
endmodule

module UninitializedRegister(dat

### Dividing Modules

#### Utility Functions

In [4]:
def counting_modules(verilog_code):
    module_counter = 0
    for i in verilog_code:
        if i == "endmodule":
            module_counter += 1
    return module_counter

In [5]:
def creating_module_lists(verilog_code, module_lists, module_counter):
    for i in verilog_code:
        module_lists[len(module_lists)-module_counter].append(i)
        if i == "endmodule":
            module_counter -= 1
    return module_lists

In [6]:
def remove_empty_strings(module_lists):
    for i in module_lists:
     while("" in i) : 
        i.remove("")
    return module_lists

In [7]:
def remove_comments(module_lists):
    i_counter = 0
    for i in module_lists:
        
        j_counter = 0
        
        for j in i:
            
            if j.startswith('//'):
                i.remove(j)
            
            elif '//' in j:
                index = j.find('//')
                tmp = j[index:].split(" ")
                if "synopsys" in tmp:
                    break
                j = j[:index]
                module_lists[i_counter][j_counter] = j
            
            j_counter += 1

        i_counter += 1
    return module_lists

#### Applying utility functions

In [8]:
module_counter = counting_modules(verilog_code)

In [9]:
# create lists according to module counter
module_lists = [[] for i in range(module_counter)]
module_lists

[[], [], [], [], [], [], [], [], [], [], [], [], [], []]

In [10]:
# append each module to a module_lists from the y
module_lists = creating_module_lists(verilog_code, module_lists, module_counter)

In [11]:
# remove all empty strings from the module_lists
module_lists = remove_empty_strings(module_lists)

In [12]:
module_lists = remove_comments(module_lists)

In [13]:
module_lists

[['module n1 (A);',
  'input reg A;',
  'always @(*)',
  'begin',
  'case (A):',
  "1'b0: A = 1'b1;",
  "1'b1: A = 1'b0;",
  'endcase',
  'end',
  'endmodule'],
 ['module ExciplitDefault (A);',
  'input reg [3:0] A;',
  'reg [1:0] B;',
  "B = 2'b00;",
  'always @(*)',
  'begin',
  'case (A):',
  "4'b0: B = 2'b1;",
  "4'b1: B = 2'b0;",
  'endcase',
  'end',
  'endmodule'],
 ['module n2 (A);',
  'input reg [1:0] A;',
  'always @(*)',
  'begin',
  'case (A):',
  "1'b0: A = 1'b1;",
  "1'b1: A = 1'b0;",
  'endcase',
  'end',
  'endmodule'],
 ['module n3 (A);',
  'input reg [3:0] A;',
  'always @(*)',
  'begin',
  'casez (A): // synopsys full_case parallel_case',
  "4'b???1: F = 2'b00;",
  "4'b??1?: F = 2'b01;",
  "4'b?1??: F = 2'b10;",
  "4'bl???: F = 2'b11;",
  'endcase',
  'end',
  'endmodule'],
 ['module n4 (t);',
  'input reg [3:0] t;',
  'always @(*)',
  'begin',
  'case (t): // synopsys full_case',
  "4'b0000: t = 4'b0001;",
  "4'b0001: t = 4'b0010;",
  'endcase',
  'end',
  'endmodul

### Unreachable State

In [14]:
def calculating_FSM_index(module_lists):
    FSM_index = []
    for i in module_lists:
        always_counter = 0
        for j in i:
            if j.startswith('always'):
                always_counter += 1
                if always_counter > 1:
                    FSM_index.append(module_lists.index(i))
    for i in FSM_index:
        mask_clk = 0
        for j in module_lists[i]:
            if j.startswith('always') and "clk" in j:
                    #print(j)
                    mask_clk = 1
        if mask_clk == 0:
            FSM_index.remove(i)
                
    return FSM_index

In [15]:
FSM_index = calculating_FSM_index(module_lists)
for i in FSM_index:
    for j in module_lists[i]:
        print(j)

module UnreachableState(clk, state_out);
input clk;
output reg [1:0] state_out;
reg [1:0] current_state, next_state;
localparam [1:0] S1 = 2'b00 ;
localparam [1:0] S2 = 2'b01 ;
localparam [1:0] S3 = 2'b10 ;
always @(posedge clk)
begin
current_state <= next_state;
end
always @(*)
begin
case (current_state)
S1:
begin
next_state <= S2;
end
S2:
begin
next_state <= S1;
end
S3:
begin
next_state <= S1;
end
endcase
state_out = current_state;
end
endmodule


### Non-Full Case

In [16]:
def calculating_case_index(module_lists):
    cases_index = []
    for i in module_lists:
        for j in i:
            cmp = re.search("^case",j)
            if cmp or re.search("^casez",j) or re.search("^casex",j):
                cases_index.append(module_lists.index(i))
    return cases_index

In [17]:
def generating_reg_list(module_lists,cases_index):
    reg_list = []
    # in each list first number is case_index and second number is reg_index & size of reg is third number
    for i in cases_index:
        for j in module_lists[i]:
            if j.startswith('input'):
                j = j[len('input '):]
            if j.startswith('output'):
                j = j[len('output '):]
            if j.startswith('reg'):
                # store reg name & size in a list
                if ']' in j or '[' in j:
                    low_bound = j.find('[')
                    high_bound = j.find(']')
                    size = j[low_bound+1:high_bound]
                    size = size.split(':')
                    size = int(size[0])-int(size[1])+1
                    j = j[high_bound+1:].replace(';', '')
                    j = j.replace(' ', '')
                    variables_names = j.split(',')
                    for var_name in variables_names:
                        reg_list.append([i, var_name, size])
                else:
                    size = 1
                    low_bound = j.find('g')
                    j = j[low_bound+1:].replace(';', '')
                    j = j.replace(' ', '')
                    variables_names = j.split(',')
                    for var_name in variables_names:
                        reg_list.append([i, var_name, size])  
    #print(reg_list)
    return reg_list

In [18]:
def check_full_case(module_lists):
    cases_index = calculating_case_index(module_lists)
    reg_list = generating_reg_list(module_lists,cases_index)
    size_list_for_case = []
    mask = 0
    for i in cases_index:
        for j in module_lists[i]:
            for t in reg_list:
                if t[0] == i and t[1] in j:
                    tmp = j.split("=")
                    if len(tmp) > 1:
                        #print(tmp)
                        pre_defined_reg = tmp[0]

                    
            if j.startswith('case') or j.startswith('casez') or j.startswith('casex'):
                #print(module_lists[i][module_lists[i].index(j)+1:])
                line_after_case = module_lists[i][module_lists[i].index(j)+1:module_lists[i].index(j)+2]
                
                # convert from list to string
                line_after_case = ''.join(line_after_case)
                # remove all characters before :
                line_after_case = line_after_case[line_after_case.find(':')+1:]
                #print(line_after_case)
                try:
                    if pre_defined_reg in line_after_case:
                        print("Full Case: ")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print("Exciplicit Default Case")
                        print("=====================================")
                        print()
                        break
                except:
                    pass
                if "synopsys" in j:
                    print("Full Case: ")
                    print("Module", i + 1, ":", module_lists[i][0])
                    print("Found Synopsys Case")
                    print("=====================================")
                    print()
                    break
                bound = j.find('e')
                reg_name = j[bound+1:]
                reg_name = reg_name.replace(' ', '')
                reg_name = reg_name.replace('(', '')
                reg_name = reg_name.replace(')', '')
                reg_name = reg_name.replace(':', '')
                reg_name = reg_name
                #print(reg_name)
                # take size of reg_name
                for k in reg_list:
                    if k[1] == reg_name and k[0] == i:
                        size = k[2]
                        size_list_for_case.append(size)
                        break
                case_i = module_lists[i].index(j)
                
                rows_count = 0
                
                for line in module_lists[i][case_i+1:]:
                    if line.startswith('endcase'):
                        break
                    if line.startswith('default'):
                        print("Full Case: ")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print("Found Default Case")
                        print("=====================================")
                        print()
                        mask = 1
                        break
                    if ":" in line:
                        rows_count += 1
                #print("rows_count", rows_count)
                if mask == 0:
                    if pow(2, size) != rows_count:
                        print("Non-Full Case:")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print(f"Size of reg \"{reg_name}\":", size) 
                        print("Number of variations:", rows_count)
                        print("Expected number of variations:", pow(2, size))
                        print("Number of variations is not equal to expected number of variations")
                        print("=====================================")
                        print()
                        break
                    else:
                        print("Full Case: ")
                        print("Module", i + 1, ":", module_lists[i][0])
                        print(f"Size of reg \"{reg_name}\":", size) 
                        print("Number of variations:", rows_count)
                        print("Expected number of variations:", pow(2, size))
                        print("Number of variations is equal to expected number of variations")
                        print("=====================================")
                        print()
                        break
            
                          
#print(size_list_for_case)   
check_full_case(module_lists)

Full Case: 
Module 1 : module n1 (A);
Size of reg "A": 1
Number of variations: 2
Expected number of variations: 2
Number of variations is equal to expected number of variations

Full Case: 
Module 2 : module ExciplitDefault (A);
Exciplicit Default Case

Non-Full Case:
Module 3 : module n2 (A);
Size of reg "A": 2
Number of variations: 2
Expected number of variations: 4
Number of variations is not equal to expected number of variations

Full Case: 
Module 4 : module n3 (A);
Found Synopsys Case

Full Case: 
Module 5 : module n4 (t);
Found Synopsys Case

Non-Full Case:
Module 9 : module UnreachableState(clk, state_out);
Size of reg "current_state": 2
Number of variations: 3
Expected number of variations: 4
Number of variations is not equal to expected number of variations

Non-Full Case:
Module 10 : module n5(y_out);
Size of reg "x": 2
Number of variations: 2
Expected number of variations: 4
Number of variations is not equal to expected number of variations

Full Case: 
Module 11 : module 

### Unintialized registers

### Inferring Latches

In [19]:
def check_infer_latch(module_lists):
    line_count = 0

    for module_index, module in enumerate(module_lists, start=1):
        always_blocks = []
        used_signals = set()
        for i, line in enumerate(module):
            if re.search(r'always\s*@', line):
                always_blocks.append(i)

            
        for always_index in always_blocks:
            sensitivity_line = module[always_index]
            sensitivity_line = sensitivity_line.replace("always", "").replace("@", "").strip()

            # Extract the block content including the line with 'always' keyword
            block_content = [sensitivity_line] + module[always_index + 1:]

            # Check for latch inference scenarios
            check_sensitivity_list(sensitivity_line, module_index, line_count + always_index + 1, used_signals)
            check_feedback_loop(block_content, module_index, line_count + always_index + 1)
            check_missing_initial_condition(block_content, module_index, line_count + always_index + 1)
            check_if_without_else(block_content, module_index, line_count + always_index + 1)
            check_case_without_default(block_content, module_index, line_count + always_index + 1)

        # Update line_count for the next module
        line_count += len(module)
# Update the check_sensitivity_list function
def check_sensitivity_list(sensitivity_line, module_index, line_number, used_signals):
    # Check if sensitivity_line is "@*" or contains clk
    if "@(*)" in sensitivity_line or "clk" in sensitivity_line or "@*" in sensitivity_line:
        print("full list")
        return

    # Extract signals from the sensitivity line
    sensitivity_list = re.findall(r'\b([a-zA-Z_]\w*)\b', sensitivity_line)
    # Remove non-signal elements from the sensitivity list
    sensitivity_list = [signal for signal in sensitivity_list if signal not in ["*", "("]]

    # Check for missing signals
    missing_signals = set()
    for signal in sensitivity_list:
        if signal not in used_signals:
            missing_signals.add(signal)

    # Print results
    if missing_signals:
        print(f"May Infer Latch in module {module_index}, : {module_lists[module_index-1][0]} , line: {line_number}")
        print(f"Reason: Signal(s) missing in the sensitivity list: {', '.join(missing_signals)}")
        print("=====================================")



def check_if_without_else(block_content, module_index, line_number):
    found_if = False
    found_else = False

    for line in block_content:
            if re.search(r'\bif\b', line):
                found_if = True
            
            # Check for 'else' inside 'always' block
            if found_if and re.search(r'\belse\b', line):
                found_else = True

    # If we reach here, it means 'if' was not followed by 'else' inside 'always' block
    if found_if and not found_else:
        print(f"Infer Latch in module {module_index}, : {module_lists[module_index - 1][0]}, line: {line_number}")
        print("Reason: 'if' statement without 'else' detected")
        print("=====================================")

def check_case_without_default(block_content, module_index, line_number):
    inside_always_block = False
    for line in block_content:
        if re.search(r'always\s*@', line):
            inside_always_block = True
        elif re.search(r'end', line):
            inside_always_block = False

        if inside_always_block and re.search(r'^\s*case\b', line) and not re.search(r'\bdefault\b', line):
            print(f"May Infer Latch in module {module_index},: {module_lists[module_index-1][0]}, line: {line_number}")
            print("Reason: 'case' statement without 'default' detected")
            print("=====================================")

def check_missing_initial_condition(block_content, module_index, line_number):
     for line_index, line in enumerate(block_content):
        if re.search(r'^\s*\w+\s*=\s*\w+\s*;', line):
            print(f"May Infer Latch in module {module_index},: {module_lists[module_index-1][0]}, line: {line_number + line_index}")
            print("Reason: Missing initial condition")
            print("Missing Initial Condition Line:", line.strip())
            print("=====================================")
            return

def check_feedback_loop(block_content, module_index, line_number):
    dependencies = {}  # Dictionary to store signal dependencies

    # Extract signal dependencies from the block content
    for line in block_content[1:]:  # Exclude the sensitivity line
       match = re.search(r'\b(\w+)\s*(<=|=)\s*(.+?)\s*;', line)
       if match:
            left_signal, operator, right_expr = match.groups()
            dependencies[left_signal] = re.findall(r'\b\w+\b', right_expr)

    # Check for feedback loops
    for signal in dependencies:
        if signal in dependencies[signal]:
            print(f"May Infer Latch in module {module_index}, : {module_lists[module_index-1][0]}, line: {line_number}")
            print("Reason: Combinational Feedback loop detected")
            print("=====================================")
            return


# Call the modified checker function
check_infer_latch(module_lists)


May Infer Latch in module 6, : module UnreachableBlocks(data_out); , line: 63
Reason: Signal(s) missing in the sensitivity list: state
May Infer Latch in module 8, : module InferringLatches(enable, Data, out); , line: 83
Reason: Signal(s) missing in the sensitivity list: enable
May Infer Latch in module 8,: module InferringLatches(enable, Data, out);, line: 87
Reason: Missing initial condition
Missing Initial Condition Line: out = Data;
Infer Latch in module 8, : module InferringLatches(enable, Data, out);, line: 83
Reason: 'if' statement without 'else' detected
full list
May Infer Latch in module 9,: module UnreachableState(clk, state_out);, line: 118
Reason: Missing initial condition
Missing Initial Condition Line: state_out = current_state;
May Infer Latch in module 9,: module UnreachableState(clk, state_out);, line: 98
Reason: 'case' statement without 'default' detected
May Infer Latch in module 9,: module UnreachableState(clk, state_out);, line: 118
Reason: Missing initial conditi

### Unreachable Blocks

### Multiple Drivers

In [20]:
def check_multidriven_variables_always_blocks(module_lists):
    modules_with_multidriven_variables = set()

    for module_index, module in enumerate(module_lists, start=1):
        # Always blocks and their contents
        always_blocks = []

        # Extract contents of always blocks
        inside_always = False
        current_always_block = []

        for line in module:
            if 'always' in line:
                inside_always = True
                current_always_block = ['always']
            elif inside_always:
                current_always_block.append(line.strip())
                if 'end' in line:
                    inside_always = False
                    always_blocks.append(current_always_block)

        # Compare always blocks to identify multidriven variables
        seen_variables = set()
        for i, block1 in enumerate(always_blocks):
            for j, block2 in enumerate(always_blocks):
                if i != j:
                    # Check for 'variable ='
                    for line1 in block1[2:-1]:  # Skip 'always', 'begin', 'end'
                        for line2 in block2[2:-1]:  # Skip 'always', 'begin', 'end'
                            if '=' in line1 and '=' in line2:
                                variable_name1 = line1.split('=')[0].strip()
                                variable_name2 = line2.split('=')[0].strip()
                                if variable_name1 == variable_name2:
                                    seen_variables.add(variable_name1)

        # Print the results only if there are multidriven variables in the module
        if seen_variables and module_index not in modules_with_multidriven_variables:
            modules_with_multidriven_variables.add(module_index)
            print(f"Module {module_index}:")
            for block in always_blocks:
                print("Always Block:", block)
            print("Multidriven Variables:", seen_variables)
            print("---------------------")

# Example usage
check_multidriven_variables_always_blocks(module_lists)


def check_multidriven_variables_assign_statements(module_lists):
    for module_index, module in enumerate(module_lists, start=1):
        # Extracted variable names and sizes
        variables = set()
        # Assign statements and assigned variables
        assign_statements = []

        for line in module:
            # Check if the line contains an assign statement
            if 'assign' in line:
                parts = line.split()
                assign_index = parts.index('assign')

                # Extract the assigned variable name
                if assign_index < len(parts) - 1:
                    variable_name = parts[assign_index + 1].rstrip(';')
                    # Check if the variable is already assigned in another statement
                    if variable_name in variables:
                        assign_statements.append((line, variable_name))
                    else:
                        variables.add(variable_name)

        # Check for multidriven variables within the same module based on assign statements
        seen_variables = set()
        for statement, variable in assign_statements:
            # Check if the variable is repeated (multidriven) in the same module
            if variable in seen_variables:
                print(f"Module {module_index}: Variable '{variable}' is multidriven.")
            else:
                seen_variables.add(variable)

        # Print the results only if there are multidriven variables in the module
        if seen_variables:
            print(f"Module {module_index}:")
            print("Assign Statements:", assign_statements)
            print("Multidriven Variables:", seen_variables)
            print("---------------------")

# Example usage
check_multidriven_variables_assign_statements(module_lists)

Module 12:
Always Block: ['always', 'begin', 'y = y + 1;', 'end']
Always Block: ['always', 'begin', "y = 1'b0;", 'end']
Multidriven Variables: {'y'}
---------------------
Module 12:
Assign Statements: [("assign out = 0'b1;", 'out')]
Multidriven Variables: {'out'}
---------------------


### Arithmetic Overflow

In [21]:
def checkArithmeticOverflow(module_lists):
    # extracting the variables from the module
    variable_list = [[] for _ in range(len(module_lists))]
    
    for module_index, module in enumerate(module_lists, start=1):
        print("Module Number:", module_index)
        for variable_declaration in module:
            # check if the variable is input, output, wire, or reg
            if variable_declaration.startswith(('input ', 'output ', 'wire ', 'reg ')):
                # Extract variable name and size
                parts = variable_declaration.split()
                parts[-1] =  parts[-1].rstrip(';')
                # Variable names are strings after '[number:number]' or after 'reg', 'wire', 'input', 'output'
                variable_names = [part.strip(',') for part in parts[1:] if part not in ('reg', 'wire', 'input', 'output')]
                for i in variable_names:
                    if '[' in i and ']' in i:
                        variable_names.remove(i)
                
                        
                        
                variable_size = 1  # Default size is 1
                
                # Check if [number-1:0] pattern is present
                if '[' in variable_declaration and ']' in variable_declaration:
                    size_part = variable_declaration.split('[')[1].split(']')[0]
                    try:
                        # Extract the size correctly
                        if ':' in size_part:
                            sizes = size_part.split(':')
                            variable_size = abs(int(sizes[0]) - int(sizes[1])) + 1
                        else:
                            variable_size = int(size_part) + 1
                    except ValueError:
                        pass
                
                # Store the variable names and size
                variable_list[module_index - 1].extend([[name, variable_size] for name in variable_names if name])

        print("Variable List:", variable_list[module_index - 1])
        

        # extracting the operations from the module
        operation_list = []
        for operation in module:
            if '=' in operation:
                if '+' in operation or '-' in operation or '*' in operation or '/' in operation:
                    # Extract the operation
                    parts = operation.split('=')
                    parts[-1] = parts[-1].rstrip(';')
                    parts = [part.split(' ') for part in parts]
                    # Remove empty strings
                    for part in parts:
                        while '' in part:
                            part.remove('')

                    left_side_size = 0
                    right_side_size = 0
                    for variable in variable_list[module_index - 1]:
                        if variable[0] in parts[0]:
                            left_side_size = variable[1]
                            break
                    for variable in variable_list[module_index - 1]:
                        if variable[0] in parts[1]:
                            right_side_size = max(variable[1], right_side_size)
                    


                    if left_side_size <= right_side_size:
                        print("\nPossible Arithmetic Overflow in module", module_index, ":", module[0])
                        print("Line: ", operation)
                        print("Left side size:", left_side_size)
                        print("Right side size:", right_side_size)

                        

                        
                    operation_list.append(parts)


        print("Operation List:", operation_list)

        print()
    
checkArithmeticOverflow(module_lists)

Module Number: 1
Variable List: [['A', 1]]
Operation List: []

Module Number: 2
Variable List: [['A', 4], ['B', 2]]
Operation List: []

Module Number: 3
Variable List: [['A', 2]]
Operation List: []

Module Number: 4
Variable List: [['A', 4]]
Operation List: []

Module Number: 5
Variable List: [['t', 4]]
Operation List: []

Module Number: 6
Variable List: [['data_out', 1], ['reach', 1], ['state', 1]]
Operation List: []

Module Number: 7
Variable List: [['data', 1], ['data_out', 1]]
Operation List: []

Module Number: 8
Variable List: [['enable', 1], ['Data', 1], ['out', 1]]
Operation List: []

Module Number: 9
Variable List: [['clk', 1], ['state_out', 2], ['current_state', 2], ['next_state', 2]]
Operation List: []

Module Number: 10
Variable List: [['y_out', 2], ['x', 2], ['y', 2]]
Operation List: []

Module Number: 11
Variable List: [['y_out', 2], ['x', 2], ['y', 2]]
Operation List: []

Module Number: 12
Variable List: [['myIn', 2], ['y', 1]]

Possible Arithmetic Overflow in module 12 :

### Non-Parallel Case