<a href="https://colab.research.google.com/github/Rekoroka/scanner/blob/main/scanner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
%%writefile scanner.cpp
#include <iostream>
#include <fstream>
#include <string>
#include <unordered_set>
#include <cctype>
#include <iomanip>
using namespace std;

unordered_set<string> keywords = {
    "int", "float", "double", "if", "else", "for", "while", "return", "char", "void"
};

unordered_set<char> operatorsList = {'+', '-', '*', '/', '=', '<', '>', '!'};
unordered_set<char> specialChars = {'(', ')', '{', '}', ';', ',', '[', ']', '"'};

bool isKeyword(const string& s) {
    return keywords.count(s);
}

bool isIdentifier(const string& s) {
    if (!isalpha(s[0]) && s[0] != '_') return false;
    for (char c : s) {
        if (!isalnum(c) && c != '_') return false;
    }
    return true;
}

int main() {
    ifstream file("input.txt");
    if (!file.is_open()) {
        cout << "Error:Cannot open input.txt\n";
        return 1;
    }

    string code((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
    string token = "";

    cout << left << setw(25) << "Token" << setw(25) << "Type" << "\n";
    cout << "--------------------------------------\n";

    for (size_t i = 0; i < code.size(); i++) {
        char c = code[i];

        if (isspace(c)) continue;

        //single line comments (//)
        if (c == '/' && i + 1 < code.size() && code[i + 1] == '/') {
            string comment = "//";
            i += 2;
            while (i < code.size() && code[i] != '\n') {
                comment += code[i++];
            }
            cout << left << setw(25) << comment << setw(25) << "Comment" << "\n";
            continue;
        }

        //block comments (/* ... */)
        if (c == '/' && i + 1 < code.size() && code[i + 1] == '*') {
            string comment = "/*";
            i += 2;
            while (i + 1 < code.size() && !(code[i] == '*' && code[i + 1] == '/')) {
                comment += code[i++];
            }
            comment += "*/";
            i += 1;
            cout << left << setw(25) << comment << setw(25) << "Comment" << "\n";
            continue;
        }

        //Identifiers & Keywords
        if (isalpha(c) || c == '_') {
            token = "";
            while (i < code.size() && (isalnum(code[i]) || code[i] == '_'))
                token += code[i++];
            i--;
            if (isKeyword(token))
                cout << left << setw(25) << token << setw(25) << "Keyword" << "\n";
            else
                cout << left << setw(25) << token << setw(25) << "Identifier" << "\n";
            continue;
        }

        //Numbers
        else if (isdigit(c)) {
            token = "";
            bool hasDot = false;
            while (i < code.size() &&
                  (isdigit(code[i]) ||
                   (code[i] == '.' && !hasDot) ||
                   (tolower(code[i]) == 'e') ||
                   ((code[i] == '+' || code[i] == '-') && i > 0 && (tolower(code[i - 1]) == 'e')))) {
                if (code[i] == '.') hasDot = true;
                token += code[i++];
            }
            i--;
            cout << left << setw(25) << token << setw(25) << "Numeric Constant" << "\n";
            continue;
        }

        //Operators
        else if (operatorsList.count(c)) {
            string op(1, c);
            if (i + 1 < code.size()) {
                string two = op + code[i + 1];
                if (two == "==" || two == "<=" || two == ">=" || two == "!=" || two == "++" || two == "--") {
                    op = two;
                    i++;
                }
            }
            cout << left << setw(25) << op << setw(25) << "Operator" << "\n";
            continue;
        }

        //Special Characters
        else if (specialChars.count(c)) {
            cout << left << setw(25) << c << setw(25) << "Special Character" << "\n";
            continue;
        }
    }

    return 0;
}


Overwriting scanner.cpp


In [51]:
%%writefile input.txt
int main() {
    int x,y;
    // This is a single-line comment
    if (x == 42) {
        /* This is
           a block
           comment */
        x = x-3;
    } else {
        y = 3.1; // Another comment
    }
    return 0;
}


Overwriting input.txt


In [52]:
!g++ scanner.cpp -o scanner
!./scanner


Token                    Type                     
--------------------------------------
int                      Keyword                  
main                     Identifier               
(                        Special Character        
)                        Special Character        
{                        Special Character        
int                      Keyword                  
x                        Identifier               
,                        Special Character        
y                        Identifier               
;                        Special Character        
// This is a single-line commentComment                  
if                       Keyword                  
(                        Special Character        
x                        Identifier               
==                       Operator                 
42                       Numeric Constant         
)                        Special Character        
{                        Special Cha