<a href="https://colab.research.google.com/github/Rekoroka/parser/blob/main/parser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%writefile scanner.cpp
#include <iostream>
#include <fstream>
#include <string>
#include <unordered_set>
#include <cctype>
#include <iomanip>
using namespace std;

unordered_set<string> keywords = {
    "int", "float", "double", "if", "else", "for", "while", "return", "char", "void"
};

unordered_set<char> operatorsList = {'+', '-', '*', '/', '=', '<', '>', '!', '&', '|'};
unordered_set<char> specialChars = {'(', ')', '{', '}', ';', ',', '[', ']', '"'};

bool isKeyword(const string& s) {
    return keywords.count(s);
}

void outputToken(ofstream& outFile, const string& token, const string& type) {
    if (type != "Comment") {
        outFile << type << " " << token << "\n";
    }
}

int main() {
    ifstream file("input.txt");
    if (!file.is_open()) {
        cout << "Error: Cannot open input.txt\n";
        return 1;
    }

    ofstream outFile("tokens_output.txt");
    if (!outFile.is_open()) {
        cout << "Error: Cannot open tokens_output.txt for writing\n";
        return 1;
    }

    string code((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
    string token = "";


    cout << "\n\n";
    cout << "\n--- Scanner Output ---\n";
    cout << left << setw(25) << "Token Value" << setw(25) << "Token Type" << "\n";
    cout << "--------------------------------------\n";

    for (size_t i = 0; i < code.size(); i++) {
        char c = code[i];
        string currentType = "";
        string currentValue = "";

        if (isspace(c)) continue;

        // single line comments (//)
        if (c == '/' && i + 1 < code.size() && code[i + 1] == '/') {
            currentValue = "//";
            i += 2;
            while (i < code.size() && code[i] != '\n') {
                currentValue += code[i++];
            }
            currentType = "Comment";
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }

        // block comments (/* ... */)
        if (c == '/' && i + 1 < code.size() && code[i + 1] == '*') {
            currentValue = "/*";
            i += 2;
            while (i + 1 < code.size() && !(code[i] == '*' && code[i + 1] == '/')) {
                currentValue += code[i++];
            }
            if (i < code.size()) i++;
            currentValue += "*/";
            currentType = "Comment";
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }

        // Identifiers & Keywords
        if (isalpha(c) || c == '_') {
            token = "";
            while (i < code.size() && (isalnum(code[i]) || code[i] == '_'))
                token += code[i++];
            i--;
            currentValue = token;
            currentType = isKeyword(token) ? "Keyword" : "Identifier";
            outputToken(outFile, currentValue, currentType);
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }

        // Numbers
        else if (isdigit(c)) {
            token = "";
            bool hasDot = false;
            while (i < code.size() &&
                  (isdigit(code[i]) || (code[i] == '.' && !hasDot))) {
                if (code[i] == '.') hasDot = true;
                token += code[i++];
            }
            i--;
            currentValue = token;
            currentType = "Numeric Constant";
            outputToken(outFile, currentValue, currentType);
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }

        // Operators
        else if (operatorsList.count(c)) {
            string op(1, c);
            if (i + 1 < code.size()) {
                string two = op + code[i + 1];
                if (two == "==" || two == "<=" || two == ">=" || two == "!=" || two == "++" || two == "--" || two == "&&" || two == "||") {
                    op = two;
                    i++;
                }
            }
            currentValue = op;
            currentType = "Operator";
            outputToken(outFile, currentValue, currentType);
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }

        // Special Characters
        else if (specialChars.count(c)) {
            currentValue = string(1, c);
            currentType = "Special Character";
            outputToken(outFile, currentValue, currentType);
            cout << left << setw(25) << currentValue << setw(25) << currentType << "\n";
            continue;
        }
    }

    outputToken(outFile, "EOF", "EOF");
    outFile.close();

    cout << "--------------------------------------\n";
    cout << "Scanner: Tokens successfully written to tokens_output.txt\n";
    return 0;
}

Writing scanner.cpp


In [2]:
%%writefile input.txt
int main() {
    int x,y;
    // This is a single-line comment
    if (x == 42) {
        /* This is
           a block
           comment */
        x = x-3;
    } else {
        y = 3.1; // Another comment
    }
    return 0;
}


Writing input.txt


In [3]:
!g++ scanner.cpp -o scanner
!./scanner




--- Scanner Output ---
Token Value              Token Type               
--------------------------------------
int                      Keyword                  
main                     Identifier               
(                        Special Character        
)                        Special Character        
{                        Special Character        
int                      Keyword                  
x                        Identifier               
,                        Special Character        
y                        Identifier               
;                        Special Character        
// This is a single-line commentComment                  
if                       Keyword                  
(                        Special Character        
x                        Identifier               
==                       Operator                 
42                       Numeric Constant         
)                        Special Character        
{         

In [4]:
%%writefile parser.cpp
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <stdexcept>
#include <sstream>

using namespace std;

struct Token {
    std::string value;
    std::string type;
};

std::vector<Token> tokens;
size_t currentTokenIndex = 0;

Token peek() {
    if (currentTokenIndex < tokens.size()) {
        return tokens[currentTokenIndex];
    }
    return {"", "EOF"};
}

void advance() {
    if (currentTokenIndex < tokens.size()) {
        currentTokenIndex++;
    }
}

void match(const std::string& expectedValue, const std::string& expectedType) {
    Token current = peek();

    // Check if the current token matches the expected value and type
    if ((expectedValue.empty() || current.value == expectedValue) && current.type == expectedType) {
        advance();
    }
    else {
        string errorMsg = "Syntax Error: Expected ";
        if (!expectedValue.empty()) {
            errorMsg += "'" + expectedValue + "' (" + expectedType + ")";
        } else {
            errorMsg += "Type '" + expectedType + "'";
        }
        errorMsg += ", but found '" + current.value + "' (Type: " + current.type + ")";
        errorMsg += " at token index " + std::to_string(currentTokenIndex);
        throw runtime_error(errorMsg);
    }
}

void matchType(const std::string& expectedType) {
    match("", expectedType);
}

// Grammar Rules
void parseExpression();
void parseTerm();
void parseFactor();
void parseLogicalExpression();
void parseIfStatement();
void parseBlock();
void parseStatement();
void parseFunction();
void parseProgram();


// Factor -> Identifier | NumericConstant | '(' Expression ')'
void parseFactor() {
    Token current = peek();
    if (current.type == "Identifier" || current.type == "Numeric Constant") {
        advance();
    } else if (current.value == "(") {
        match("(", "Special Character");
        parseExpression();
        match(")", "Special Character");
    } else {
        throw runtime_error("Factor: Expected Identifier, Numeric Constant, or '('");
    }
}

// Term -> Factor { ('*' | '/') Factor }
void parseTerm() {
    parseFactor();
    while (peek().value == "*" || peek().value == "/") {
        matchType("Operator");
        parseFactor();
    }
}

// Expression -> Term { ('+' | '-') Term }
void parseExpression() {
    parseTerm();
    while (peek().value == "+" || peek().value == "-") {
        matchType("Operator");
        parseTerm();
    }
}

// LogicalExpression -> Expression RelationalOperator Expression
void parseLogicalExpression() {
    parseExpression();
    Token op = peek();
    if (op.value == "==" || op.value == "!=" || op.value == "<" || op.value == ">" || op.value == "<=" || op.value == ">=") {
        matchType("Operator"); // RelationalOperator
        parseExpression();
    }
}

// Block -> '{' Statements '}'
void parseBlock() {
    match("{", "Special Character");
    while (peek().value != "}") {
        parseStatement();
    }
    match("}", "Special Character");
}

void parseIfStatement() {
    match("if", "Keyword");
    match("(", "Special Character");
    parseLogicalExpression();
    match(")", "Special Character");

    // Statement following 'if'
    if (peek().value == "{") {
        parseBlock();
    } else {
        parseStatement();
    }

    if (peek().value == "else") {
        match("else", "Keyword");
        // Statement following 'else'
        if (peek().value == "{") {
            parseBlock();
        } else {
            parseStatement();
        }
    }
}

// Statement -> Declaration | Assignment ';' | IfStatement | ReturnStatement | Block
void parseStatement() {
    Token current = peek();

    if (current.type == "Keyword") {
        if (current.value == "return") {
            // ReturnStatement -> 'return' Expression ';'
            advance();
            parseExpression();
            match(";", "Special Character");
        } else if (current.value == "if") {
            parseIfStatement();
        } else if (current.value == "int" || current.value == "float" || current.value == "double") {
            // Declaration -> Type Identifier [',' Identifier]* ['=' Expression] ';'
            advance();
            do {
                matchType("Identifier");
                if (peek().value == "=") {
                    match("=", "Operator");
                    parseExpression();
                }
            } while (peek().value == "," && (match(",", "Special Character"), true));
            match(";", "Special Character");
        }
    }
    else if (current.type == "Identifier") {
        // Assignment -> Identifier '=' Expression ';'
        matchType("Identifier");
        match("=", "Operator");
        parseExpression();
        match(";", "Special Character");
    }
    else if (current.value == "{") {
        // Block
        parseBlock();
    }
    else {
        throw runtime_error("Statement: Expected valid statement start (Keyword, Identifier, or '{'). Found: " + current.value);
    }
}

// Function -> Type Identifier '(' Parameters ')' Block
void parseFunction() {
    matchType("Keyword");
    matchType("Identifier");
    match("(", "Special Character");

    // Parameters
    if (peek().value != ")") {
         matchType("Keyword");
         matchType("Identifier");
         while (peek().value == ",") {
             match(",", "Special Character");
             matchType("Keyword");
             matchType("Identifier");
         }
    }

    match(")", "Special Character");
    parseBlock();
}

// Program -> Functions EOF
void parseProgram() {
    parseFunction();
    match("EOF", "EOF");
}


int main() {
    ifstream tokenFile("tokens_output.txt");
    if (!tokenFile.is_open()) {
        cerr << "Error: Cannot open tokens_output.txt. Please ensure scanner.cpp runs first to create this file.\n";
        return 1;
    }

    string line, type_part1, type_part2, value;

    // تحميل التوكنات من الملف (الإصلاح لقراءة الأجزاء المركبة مثل "Special Character")
    while (getline(tokenFile, line)) {
        stringstream ss(line);
        ss >> type_part1;

        if (type_part1 == "Special" || type_part1 == "Numeric") {
            // النوع كلمتين (مثل "Special Character")
            ss >> type_part2;
            string full_type = type_part1 + " " + type_part2;

            // حساب طول النوع بالكلمتين + الفراغ
            size_t type_length = type_part1.length() + type_part2.length() + 1;
            value = line.substr(type_length + 1);
            tokens.push_back({value, full_type});
        } else {
            // النوع كلمة واحدة (مثل "Keyword")
            string full_type = type_part1;
            value = line.substr(type_part1.length() + 1);
            tokens.push_back({value, full_type});
        }
    }
    tokenFile.close();

    cout << "Parser: Starting syntax analysis from tokens_output.txt\n";

    if (tokens.empty() || tokens.back().type != "EOF") {
        cerr << "Parser Error: Token list is empty or incorrectly terminated (Did the scanner fail?).\n";
        return 1;
    }

    try {
        parseProgram();
        cout << "\n Parser: SUCCESS! The code is syntactically correct.\n";

    } catch (const std::runtime_error& e) {
        cerr << "\n Parser: FAILED! Syntax Error Detected.\n";
        cerr << "Error message: " << e.what() << "\n";

        if (currentTokenIndex < tokens.size()) {
            cerr << "The offending token is: '" << tokens[currentTokenIndex].value
                 << "' (Type: " << tokens[currentTokenIndex].type << ")\n";
        }

        return 1;
    } catch (...) {
        cerr << "An unexpected error occurred during parsing.\n";
        return 1;
    }

    return 0;
}

Writing parser.cpp


In [5]:
!g++ parser.cpp -o parser
!./parser

Parser: Starting syntax analysis from tokens_output.txt

 Parser: SUCCESS! The code is syntactically correct.


In [6]:
%%writefile input.txt
int main() {
    int x,y;
    // This is a single-line comment
    if (x == 42) {
        /* This is
           a block
           comment */
        x = x-3
    } else {
        y = 3.1; // Another comment
    }
    return 0;
}

Overwriting input.txt


In [7]:
!g++ scanner.cpp -o scanner
!./scanner
!g++ parser.cpp -o parser
!./parser




--- Scanner Output ---
Token Value              Token Type               
--------------------------------------
int                      Keyword                  
main                     Identifier               
(                        Special Character        
)                        Special Character        
{                        Special Character        
int                      Keyword                  
x                        Identifier               
,                        Special Character        
y                        Identifier               
;                        Special Character        
// This is a single-line commentComment                  
if                       Keyword                  
(                        Special Character        
x                        Identifier               
==                       Operator                 
42                       Numeric Constant         
)                        Special Character        
{         