In [18]:
'''

    [ Sourcecode Handwriting Characteristics Extractor ]
        From Source Code(C Based) -> CSV
        Made by Chu7zpah
      
      <Features>
        Feature 1. Brace('{') Habits
            case 1: 'FUNCTION(){'       (inline brace with NO Space)
            case 2: 'FUNCTION() {'      (inline brace with Space)
            case 3: 'FUNCTION()         (next line brace)
                     {'          

        Feature 2. Comment('//' or '/* */') Habits
            case 1: '//COMMENT'         ('//' with NO Space)
            case 2: '// COMMENT'        ('//' with Space)
            case 3: '/* COMMENT */'     (using '/* */')

        Feature 3. Parenthesis('(') Habits
            case 1: '{if/switch/while/for}('    (Reserved words with No Space)
            case 2: '{if/switch/while/for} ('   (Reserved words with Space)

            
'''

import re

raw_file = open('./test.c', 'r')
csv_file = open('./optest.csv', 'w')

line_list = raw_file.readlines()
features_list = [   
                    'inline_NO_brace', 'inline_YES_brace', 'nextline_brace', # Feature 1. Brace Habits
                    'double_backslash_NO_Space', 'double_backslash_YES_Space', 'single_backslash_asterisk' # Feature 2. Comment Habits

                                ]
print("[Feature Columns]")
print(features_list)


# <Extracted Feature Data Dictionary>
features_data_dictionary = {}
for feature in features_list:       # Initialize counting variables
    features_data_dictionary[f'{feature}_count'] = 0
print("\n[initialized Feature data dictionary]")
print(features_data_dictionary)


# <Feature RE Expressions>
inline_NO_brace = re.compile('[\)|\S]\{')       # 'Function(){' (RE: '\)\{')  OR  'struct{' (RE: \S\{)
inline_YES_brace = re.compile('[\)|\S]\s+\{')   # 'Function() {' (RE: '\)\s+\{')  OR  'struct {' (RE: \S\s+\{)
nextline_brace = re.compile('\s*\{')            # 'ENTER' + '{' (RE: '\s*\{')

double_backslash_NO_Space = re.compile('//\S')      # '//Comment'
double_backslash_YES_Space = re.compile('//\s+\S')  # '// Comment'
single_backslash_asterisk = re.compile('/\*')       # '/* Comment */'


# <String Checker RE Expression>
string_exist = re.compile(r'"(.*?)"')   # RE that checks is there any string(" ~ ") in code


# <Analyzing Source Code>
for line in line_list:
    # Preprocessing (Exclude Strings)  
    if string_exist.search(line) is not None:
        for string in string_exist.findall(line):       
            line = line.replace(f'"{string}"', '""')
    
    # Feature Extraction
    #   (Feature 1 - Brace)
    if inline_NO_brace.search(line) is not None:
        features_data_dictionary['inline_NO_brace_count'] += 1
    elif inline_YES_brace.search(line) is not None:
        features_data_dictionary['inline_YES_brace_count'] += 1
    elif nextline_brace.search(line) is not None:
        features_data_dictionary['nextline_brace_count'] += 1

    #   (Feature 2 - Comment)
    if double_backslash_NO_Space.search(line) is not None:
        features_data_dictionary['double_backslash_NO_Space_count'] += 1
    elif double_backslash_YES_Space.search(line) is not None:
        features_data_dictionary['double_backslash_YES_Space_count'] += 1
    elif single_backslash_asterisk.search(line) is not None:
        features_data_dictionary['single_backslash_asterisk_count'] += 1

print("\n[Calculated Feature data dictionary]")
print(features_data_dictionary)

[Feature Columns]
['inline_NO_brace', 'inline_YES_brace', 'nextline_brace', 'double_backslash_NO_Space', 'double_backslash_YES_Space', 'single_backslash_asterisk']

[initialized Feature data dictionary]
{'inline_NO_brace_count': 0, 'inline_YES_brace_count': 0, 'nextline_brace_count': 0, 'double_backslash_NO_Space_count': 0, 'double_backslash_YES_Space_count': 0, 'single_backslash_asterisk_count': 0}

[Calculated Feature data dictionary]
{'inline_NO_brace_count': 3, 'inline_YES_brace_count': 1, 'nextline_brace_count': 3, 'double_backslash_NO_Space_count': 5, 'double_backslash_YES_Space_count': 2, 'single_backslash_asterisk_count': 2}
