# Projeto de Análise de Dados
### Desenvolvido por [Ian Rodrigues dos Reis Paixão]

Este notebook foi criado por **[Ian Rodrigues dos Reis Paixão]**, com o objetivo de criar um modelo LTSM para analise e correção de codigos em javascript.    
**Data de Criação:** [07/10/2024]  
**Contato:** [iang.master100@gmail.com]  

In [None]:
import numpy as np
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense, Bidirectional, BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from typing import List, Dict, Tuple, Any

In [None]:
# Expanded JavaScript Code Dataset with more complexity
js_code_samples = [
    'console.log("Hello, World!");',
    'let x = 10; console.log(x);',
    'const add = (a, b) => { return a + b; };',
    'if (x === 10) { console.log("x is 10"); }',
    'for (let i = 0; i < 5; i++) { console.log(i); }',
    'var result = []; for (var i = 0; i < 5; i++) { result.push(i * 2); }',
    'let obj = { name: "Alice", age: 25 }; console.log(obj.name);',
    'const person = (name) => { if(name) return `Hello, ${name}!`; };',
    'try { throw new Error("Oops!"); } catch (e) { console.error(e); }',
    'function multiply(a, b) { return a * b; }',
    'const complexFunction = (arr) => { return arr.map(num => num * 2).filter(num => num > 5); };',
    'let nestedIf = (a) => { if (a > 0) { if (a < 10) { return "a is between 0 and 10"; } } return "Out of range"; };',
    'const obj2 = { key1: "value1", key2: { nestedKey: "nestedValue" } }; console.log(obj2.key2.nestedKey);',
    'const asyncFunction = async () => { let result = await fetch("https://api.example.com"); return result.json(); };',
    'for (const [key, value] of Object.entries(obj)) { console.log(`${key}: ${value}`); }',
    'const errorHandling = () => { try { throw new Error("Sample Error"); } catch (error) { console.error(error.message); } };',
    'const promiseFunction = () => { return new Promise((resolve, reject) => { resolve("Success!"); }); };',
    'const destructuring = ({ name, age }) => { console.log(`Name: ${name}, Age: ${age}`); }; destructuring({ name: "Bob", age: 30 });',
    'const filterEvenNumbers = (arr) => arr.filter(num => num % 2 === 0);',
    'const fetchData = async (url) => { try { const response = await fetch(url); return await response.json(); } catch (error) { console.error("Fetch error: ", error); } };',
    'const createUser = (name, age) => { return { name, age, greet() { return `Hello, my name is ${this.name}`; } }; };',
    'const promises = [Promise.resolve(1), Promise.resolve(2), Promise.resolve(3)]; Promise.all(promises).then(console.log);',
    'let counter = 0; const intervalId = setInterval(() => { counter++; if (counter === 5) clearInterval(intervalId); }, 1000);',
    'const recursiveFactorial = (n) => (n <= 1 ? 1 : n * recursiveFactorial(n - 1));',
    'const debounce = (func, delay) => { let timeout; return function(...args) { clearTimeout(timeout); timeout = setTimeout(() => func.apply(this, args), delay); }; };',
]

# Adjusted labels based on the provided JavaScript samples (assuming all are valid except the few invalid ones)
labels = [
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    0,  # invalid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    0,  # invalid
    1,  # valid
    1   # valid
]

# Tokenization and Padding
def preprocess_data(js_code_samples: List[str], labels: List[int], max_len: int = None) -> Tuple[np.ndarray, np.ndarray, Tokenizer, int]:
    """
    Tokenizes and pads the JavaScript code samples.

    Args:
        js_code_samples (List[str]): The JavaScript code snippets.
        labels (List[int]): Corresponding labels for the samples.
        max_len (int): Maximum length of sequences. If None, it will be determined automatically.

    Returns:
        Tuple[np.ndarray, np.ndarray, Tokenizer, int]: Preprocessed data, tokenizer, and maximum sequence length.
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(js_code_samples)

    sequences = tokenizer.texts_to_sequences(js_code_samples)

    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    X = pad_sequences(sequences, maxlen=max_len, padding='post')
    y = np.array(labels)

    return X, y, tokenizer, max_len

# Optimized Model Building
def build_optimized_model(vocab_size: int, max_length: int, embedding_dim: int = 64,
                          lstm_units: int = 128, dropout_rate: float = 0.3) -> Sequential:
    """
    Builds the optimized LSTM model for classification.

    Args:
        vocab_size (int): Size of the vocabulary.
        max_length (int): Maximum length of the input sequences.
        embedding_dim (int): Dimensionality of the embedding layer.
        lstm_units (int): Number of LSTM units.
        dropout_rate (float): Dropout rate.

    Returns:
        Sequential: Compiled optimized LSTM model.
    """
    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
        Bidirectional(LSTM(lstm_units, return_sequences=True, recurrent_dropout=0.2)),
        BatchNormalization(),
        Dropout(dropout_rate),
        LSTM(lstm_units // 2, return_sequences=False),  # Reduce LSTM units in the second layer
        Dropout(dropout_rate),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
# Updated Train Model Function with Early Stopping
def train_optimized_model(model: Sequential, X_train: np.ndarray, y_train: np.ndarray,
                          X_test: np.ndarray, y_test: np.ndarray, epochs: int = 100,
                          batch_size: int = 32) -> Any:
    """
    Trains the optimized LSTM model.

    Args:
        model (Sequential): The LSTM model to train.
        X_train (np.ndarray): Training data.
        y_train (np.ndarray): Training labels.
        X_test (np.ndarray): Test data.
        y_test (np.ndarray): Test labels.
        epochs (int): Number of training epochs.
        batch_size (int): Size of the training batches.

    Returns:
        Any: History of the training process.
    """
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=2)
    return history

# JavaScript Validation (Security, Redundancy, and Best Practices)
def validate_js_code(js_code: str, tokenizer: Tokenizer, model: Sequential,
                     max_length: int) -> Tuple[float, List[str]]:
    """
    Validates a JavaScript code snippet using the trained model and performs additional checks.

    Args:
        js_code (str): The JavaScript code snippet to validate.
        tokenizer (Tokenizer): The tokenizer for text preprocessing.
        model (Sequential): The trained LSTM model.
        max_length (int): Maximum length of the input sequences.

    Returns:
        Tuple[float, List[str]]: The validation score of the code and a list of security/warnings.
    """
    # Predict the validity of the JavaScript code
    seq = tokenizer.texts_to_sequences([js_code])
    padded_seq = pad_sequences(seq, maxlen=max_length, padding='post')
    prediction = model.predict(padded_seq)[0][0]

    issues = []

    # Check for security vulnerabilities
    if 'eval(' in js_code:
        issues.append("Potential security risk: 'eval' function used.")
    if 'document.write(' in js_code:
        issues.append("Potential security risk: 'document.write' function used.")
    if re.search(r'\b(function\s*\([^)]*\)\s*{)', js_code):
        issues.append("Redundant function declaration.")

    # Classification score (0 for invalid, 1 for valid)
    return prediction, issues

# Code Formatting Check
def check_formatting(js_code: str) -> Tuple[str, int]:
    """
    Checks the formatting of the JavaScript code snippet.

    Args:
        js_code (str): The JavaScript code snippet to check.

    Returns:
        Tuple[str, int]: The formatting issues found and the count of such issues.
    """
    issues = []
    issue_count = 0

    # Check for missing semicolons
    if not js_code.strip().endswith(';'):
        issues.append("Missing semicolon at the end of statement.")
        issue_count += 1

    # Check for inconsistent spacing
    if re.search(r'\s+\(', js_code):
        issues.append("Inconsistent spacing before parentheses.")
        issue_count += 1

    if re.search(r'\s+\{', js_code):
        issues.append("Inconsistent spacing before opening brace.")
        issue_count += 1

    if re.search(r'\s+\}', js_code):
        issues.append("Inconsistent spacing before closing brace.")
        issue_count += 1

    # Additional checks can be added here

    return '\n'.join(issues), issue_count


In [None]:
# Preprocess the data
X, y, tokenizer, max_len = preprocess_data(js_code_samples, labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the optimized model
model = build_optimized_model(vocab_size=len(tokenizer.word_index) + 1, max_length=max_len)
model.summary()

# Train the model
train_optimized_model(model, X_train, y_train, X_test, y_test)

Epoch 1/100
1/1 - 8s - 8s/step - accuracy: 0.1000 - loss: 0.7701 - val_accuracy: 1.0000 - val_loss: 0.6788
Epoch 2/100
1/1 - 1s - 619ms/step - accuracy: 0.7500 - loss: 0.6278 - val_accuracy: 1.0000 - val_loss: 0.6694
Epoch 3/100
1/1 - 0s - 119ms/step - accuracy: 0.9000 - loss: 0.5422 - val_accuracy: 1.0000 - val_loss: 0.6560
Epoch 4/100
1/1 - 0s - 115ms/step - accuracy: 0.8500 - loss: 0.4801 - val_accuracy: 1.0000 - val_loss: 0.6420
Epoch 5/100
1/1 - 0s - 131ms/step - accuracy: 0.9000 - loss: 0.4355 - val_accuracy: 1.0000 - val_loss: 0.6272
Epoch 6/100
1/1 - 0s - 119ms/step - accuracy: 0.9000 - loss: 0.3865 - val_accuracy: 1.0000 - val_loss: 0.6119
Epoch 7/100
1/1 - 0s - 119ms/step - accuracy: 0.9000 - loss: 0.3760 - val_accuracy: 1.0000 - val_loss: 0.5967
Epoch 8/100
1/1 - 0s - 118ms/step - accuracy: 0.9000 - loss: 0.3611 - val_accuracy: 1.0000 - val_loss: 0.5833
Epoch 9/100
1/1 - 0s - 127ms/step - accuracy: 0.9000 - loss: 0.3194 - val_accuracy: 1.0000 - val_loss: 0.5715
Epoch 10/100


<keras.src.callbacks.history.History at 0x7a0369857640>

In [None]:
# Sample new JavaScript code for validation and formatting
new_js_code = [
    # Valid Cases
    'var x = 10;',                                      # Valid
    'function greet() { console.log("Hi"); }',        # Valid
    'let y = 20;',                                      # Valid
    'for(let i = 0; i < 5; i++) { console.log(i); }', # Valid
    'if (a > b) { console.log(a); }',                  # Valid
    'const sayHello = () => console.log("Hello");',    # Valid (missing semicolon acceptable)
    'const data = fetch("/api/data").then(response => response.json());', # Valid
    'const { name, age } = person;',                    # Valid (assuming person is an object)

    # Invalid Cases
    'function greet() { console.log("Hi");',            # Invalid (missing closing brace)
    'let z = ;',                                         # Invalid (incomplete assignment)
    'for(let i = 0; i < 5; i++) console.log(i);',      # Invalid (missing braces)
    'if (a > b) console.log(a);',                       # Invalid (missing braces)
    'const sayHello = () => console.log("Hello"',       # Invalid (missing closing parenthesis)
    'async function fetchData() { try { await fetch("/api/data"); }', # Invalid (missing closing brace)
    'async function fetchData() { const data = await fetch("/api/data"); return data.json(); }', # Invalid (missing await)
    'const { a, b } = 10;',                              # Invalid (destructuring from a non-object)
    'var aux = 10',                                       # Invalid (missing semicolon)
    'let arr = [1, 2, 3',                               # Invalid (missing closing bracket)
    'const obj = { name: "John", age: 30,',            # Invalid (missing closing brace)
    'if (num > 10) { console.log(x);',                    # Invalid (missing closing parenthesis)
    'function factorial(n) { if (n === 0) return 1; else return n * factorial(n - 1);', # Invalid (missing closing brace)
    'try { throw new Error("Oops"); } catch(e) { console.error(e); }', # Invalid (catch syntax error fixed)
    'console.log("Hello World!");',                     # Valid
    'const multiply = (a, b) => a * b;',                # Valid
    'const obj = { name: "John", age: 30 };',           # Invalid (missing closing brace fixed)
    'let x = 1, y = 2, z = 3;',                          # Valid
    'const add = (a, b) => { return a + b; };',        # Valid
    'const greeting = (name) => { console.log(`Hello, ${name}!`); }', # Valid (closing brace added)
    'setTimeout(() => { console.log("Done!"); }, 1000);', # Valid (added missing closing parenthesis)
    'function test() { return { key: "value" };',    # Invalid (incorrect object syntax fixed)
    'const myFunc = () => { console.log("Hello"); };',  # Valid (fixed mixed quotes)
    'const fetchData = async () => { const data = await fetch(url); };', # Valid (added missing semicolon)
    'let objects = [1, 2, 3, 4, 5];',                     # Valid (added missing closing bracket)
    'const { x, y } = { x: 1, y: 2, z: 3 };',           # Valid (fixed missing closing brace)
    'let i = 0; while(i < 10) { console.log(i); i++; }', # Valid (fixed missing closing brace)
]

# Initialize storage for formatting issues
all_formatting_issues = []
total_issues_count = 0

# Loop through each JavaScript code snippet for validation and formatting
for js_code in new_js_code:
    validation_score, issues = validate_js_code(js_code, tokenizer, model, max_len)  # Unpack the tuple
    formatting_issues, issue_count = check_formatting(js_code)

    # Collect results
    all_formatting_issues.append(formatting_issues)
    total_issues_count += issue_count
    print(f"Validation Score for JS code: {validation_score:.4f}")  # Valid formatting
    print(f"Formatting Issues:\n{formatting_issues}\nTotal Issues: {issue_count}")

# Print overall results
print(f"Total Formatting Issues across all samples: {total_issues_count}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 560ms/step
Validation Score for JS code: 0.9959
Formatting Issues:

Total Issues: 0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Validation Score for JS code: 0.9961
Formatting Issues:
Missing semicolon at the end of statement.
Inconsistent spacing before opening brace.
Inconsistent spacing before closing brace.
Total Issues: 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Validation Score for JS code: 0.9961
Formatting Issues:

Total Issues: 0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Validation Score for JS code: 0.9958
Formatting Issues:
Missing semicolon at the end of statement.
Inconsistent spacing before opening brace.
Inconsistent spacing before closing brace.
Total Issues: 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Validation Score for JS code: 0.9893
Formatting Issues:
Missing semicolon at the end of sta

In [None]:
import numpy as np
import re
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense, Bidirectional, BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from typing import List, Dict, Tuple, Any

# Expanded JavaScript Code Dataset with more complexity
js_code_samples = [
    'console.log("Hello, World!");',
    'let x = 10; console.log(x);',
    'const add = (a, b) => { return a + b; };',
    'if (x === 10) { console.log("x is 10"); }',
    'for (let i = 0; i < 5; i++) { console.log(i); }',
    'var result = []; for (var i = 0; i < 5; i++) { result.push(i * 2); }',
    'let obj = { name: "Alice", age: 25 }; console.log(obj.name);',
    'const person = (name) => { if(name) return `Hello, ${name}!`; };',
    'try { throw new Error("Oops!"); } catch (e) { console.error(e); }',
    'function multiply(a, b) { return a * b; }',
    'const complexFunction = (arr) => { return arr.map(num => num * 2).filter(num => num > 5); };',
    'let nestedIf = (a) => { if (a > 0) { if (a < 10) { return "a is between 0 and 10"; } } return "Out of range"; };',
    'const obj2 = { key1: "value1", key2: { nestedKey: "nestedValue" } }; console.log(obj2.key2.nestedKey);',
    'const asyncFunction = async () => { let result = await fetch("https://api.example.com"); return result.json(); };',
    'for (const [key, value] of Object.entries(obj)) { console.log(`${key}: ${value}`); }',
    'const errorHandling = () => { try { throw new Error("Sample Error"); } catch (error) { console.error(error.message); } };',
    'const promiseFunction = () => { return new Promise((resolve, reject) => { resolve("Success!"); }); };',
    'const destructuring = ({ name, age }) => { console.log(`Name: ${name}, Age: ${age}`); }; destructuring({ name: "Bob", age: 30 });',
    'const filterEvenNumbers = (arr) => arr.filter(num => num % 2 === 0);',
    'const fetchData = async (url) => { try { const response = await fetch(url); return await response.json(); } catch (error) { console.error("Fetch error: ", error); } };',
    'const createUser = (name, age) => { return { name, age, greet() { return `Hello, my name is ${this.name}`; } }; };',
    'const promises = [Promise.resolve(1), Promise.resolve(2), Promise.resolve(3)]; Promise.all(promises).then(console.log);',
    'let counter = 0; const intervalId = setInterval(() => { counter++; if (counter === 5) clearInterval(intervalId); }, 1000);',
    'const recursiveFactorial = (n) => (n <= 1 ? 1 : n * recursiveFactorial(n - 1));',
    'const debounce = (func, delay) => { let timeout; return function(...args) { clearTimeout(timeout); timeout = setTimeout(() => func.apply(this, args), delay); }; };',
]

# Adjusted labels based on the provided JavaScript samples (assuming all are valid except the few invalid ones)
labels = [
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    0,  # invalid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    1,  # valid
    0,  # invalid
    1,  # valid
    1   # valid
]

# Tokenization and Padding
def preprocess_data(js_code_samples: List[str], labels: List[int], max_len: int = None) -> Tuple[np.ndarray, np.ndarray, Tokenizer, int]:
    """
    Tokenizes and pads the JavaScript code samples.

    Args:
        js_code_samples (List[str]): The JavaScript code snippets.
        labels (List[int]): Corresponding labels for the samples.
        max_len (int): Maximum length of sequences. If None, it will be determined automatically.

    Returns:
        Tuple[np.ndarray, np.ndarray, Tokenizer, int]: Preprocessed data, tokenizer, and maximum sequence length.
    """
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(js_code_samples)

    sequences = tokenizer.texts_to_sequences(js_code_samples)

    if max_len is None:
        max_len = max(len(seq) for seq in sequences)

    X = pad_sequences(sequences, maxlen=max_len, padding='post')
    y = np.array(labels)

    return X, y, tokenizer, max_len

# Optimized Model Building
def build_optimized_model(vocab_size: int, max_length: int, embedding_dim: int = 64,
                          lstm_units: int = 128, dropout_rate: float = 0.3) -> Sequential:
    """
    Builds the optimized LSTM model for classification.

    Args:
        vocab_size (int): Size of the vocabulary.
        max_length (int): Maximum length of the input sequences.
        embedding_dim (int): Dimensionality of the embedding layer.
        lstm_units (int): Number of LSTM units.
        dropout_rate (float): Dropout rate.

    Returns:
        Sequential: Compiled optimized LSTM model.
    """
    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
        Bidirectional(LSTM(lstm_units, return_sequences=True, recurrent_dropout=0.2)),
        BatchNormalization(),
        Dropout(dropout_rate),
        LSTM(lstm_units // 2, return_sequences=False),  # Reduce LSTM units in the second layer
        Dropout(dropout_rate),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

# Updated Train Model Function with Early Stopping
def train_optimized_model(model: Sequential, X_train: np.ndarray, y_train: np.ndarray,
                          X_test: np.ndarray, y_test: np.ndarray, epochs: int = 100,
                          batch_size: int = 32) -> Any:
    """
    Trains the optimized LSTM model.

    Args:
        model (Sequential): The LSTM model to train.
        X_train (np.ndarray): Training data.
        y_train (np.ndarray): Training labels.
        X_test (np.ndarray): Test data.
        y_test (np.ndarray): Test labels.
        epochs (int): Number of training epochs.
        batch_size (int): Size of the training batches.

    Returns:
        Any: History of the training process.
    """
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=2)
    return history

# JavaScript Validation (Security, Redundancy, and Best Practices)
def validate_js_code(js_code: str, tokenizer: Tokenizer, model: Sequential,
                     max_length: int) -> Tuple[float, List[str]]:
    """
    Validates a JavaScript code snippet using the trained model and performs additional checks.

    Args:
        js_code (str): The JavaScript code snippet to validate.
        tokenizer (Tokenizer): The tokenizer for text preprocessing.
        model (Sequential): The trained LSTM model.
        max_length (int): Maximum length of the input sequences.

    Returns:
        Tuple[float, List[str]]: The validation score of the code and a list of security/warnings.
    """
    # Predict the validity of the JavaScript code
    seq = tokenizer.texts_to_sequences([js_code])
    padded_seq = pad_sequences(seq, maxlen=max_length, padding='post')
    prediction = model.predict(padded_seq)[0][0]

    issues = []

    # Check for security vulnerabilities
    if 'eval(' in js_code:
        issues.append("Potential security risk: 'eval' function used.")
    if 'document.write(' in js_code:
        issues.append("Potential security risk: 'document.write' function used.")
    if re.search(r'\b(function\s*\([^)]*\)\s*{)', js_code):
        issues.append("Redundant function declaration.")

    # Classification score (0 for invalid, 1 for valid)
    return prediction, issues

# Code Formatting Check
def check_formatting(js_code: str) -> Tuple[str, int]:
    """
    Checks the formatting of the JavaScript code snippet.

    Args:
        js_code (str): The JavaScript code snippet to check.

    Returns:
        Tuple[str, int]: The formatting issues found and the count of such issues.
    """
    issues = []
    issue_count = 0

    # Check for missing semicolons
    if not js_code.strip().endswith(';'):
        issues.append("Missing semicolon at the end of statement.")
        issue_count += 1

    # Check for inconsistent spacing
    if re.search(r'\s+\(', js_code):
        issues.append("Inconsistent spacing before parentheses.")
        issue_count += 1

    if re.search(r'\s+\{', js_code):
        issues.append("Inconsistent spacing before opening brace.")
        issue_count += 1

    if re.search(r'\s+\}', js_code):
        issues.append("Inconsistent spacing before closing brace.")
        issue_count += 1

    # Additional checks can be added here

    return '\n'.join(issues), issue_count

# Preprocess the data
X, y, tokenizer, max_len = preprocess_data(js_code_samples, labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the optimized model
model = build_optimized_model(vocab_size=len(tokenizer.word_index) + 1, max_length=max_len)

# Train the model
train_optimized_model(model, X_train, y_train, X_test, y_test)

# Sample new JavaScript code for validation and formatting
new_js_code = [
    # Valid Cases
    'var x = 10;',                                      # Valid
    'function greet() { console.log("Hi"); }',        # Valid
    'let y = 20;',                                      # Valid
    'for(let i = 0; i < 5; i++) { console.log(i); }', # Valid
    'if (a > b) { console.log(a); }',                  # Valid
    'const sayHello = () => console.log("Hello");',    # Valid (missing semicolon acceptable)
    'const data = fetch("/api/data").then(response => response.json());', # Valid
    'const { name, age } = person;',                    # Valid (assuming person is an object)

    # Invalid Cases
    'function greet() { console.log("Hi");',            # Invalid (missing closing brace)
    'let z = ;',                                         # Invalid (incomplete assignment)
    'for(let i = 0; i < 5; i++) console.log(i);',      # Invalid (missing braces)
    'if (a > b) console.log(a);',                       # Invalid (missing braces)
    'const sayHello = () => console.log("Hello"',       # Invalid (missing closing parenthesis)
    'async function fetchData() { try { await fetch("/api/data"); }', # Invalid (missing closing brace)
    'async function fetchData() { const data = await fetch("/api/data"); return data.json(); }', # Invalid (missing await)
    'const { a, b } = 10;',                              # Invalid (destructuring from a non-object)
    'var aux = 10',                                       # Invalid (missing semicolon)
    'let arr = [1, 2, 3',                               # Invalid (missing closing bracket)
    'const obj = { name: "John", age: 30,',            # Invalid (missing closing brace)
    'if (num > 10) { console.log(x);',                    # Invalid (missing closing parenthesis)
    'function factorial(n) { if (n === 0) return 1; else return n * factorial(n - 1);', # Invalid (missing closing brace)
    'try { throw new Error("Oops"); } catch(e) { console.error(e); }', # Invalid (catch syntax error fixed)
    'console.log("Hello World!");',                     # Valid
    'const multiply = (a, b) => a * b;',                # Valid
    'const obj = { name: "John", age: 30 };',           # Invalid (missing closing brace fixed)
    'let x = 1, y = 2, z = 3;',                          # Valid
    'const add = (a, b) => { return a + b; };',        # Valid
    'const greeting = (name) => { console.log(`Hello, ${name}!`); }', # Valid (closing brace added)
    'setTimeout(() => { console.log("Done!"); }, 1000);', # Valid (added missing closing parenthesis)
    'function test() { return { key: "value" };',    # Invalid (incorrect object syntax fixed)
    'const myFunc = () => { console.log("Hello"); };',  # Valid (fixed mixed quotes)
    'const fetchData = async () => { const data = await fetch(url); };', # Valid (added missing semicolon)
    'let objects = [1, 2, 3, 4, 5];',                     # Valid (added missing closing bracket)
    'const { x, y } = { x: 1, y: 2, z: 3 };',           # Valid (fixed missing closing brace)
    'let i = 0; while(i < 10) { console.log(i); i++; }', # Valid (fixed missing closing brace)
]

# Initialize storage for formatting issues
all_formatting_issues = []
total_issues_count = 0

# Loop through each JavaScript code snippet for validation and formatting
for js_code in new_js_code:
    validation_score, issues = validate_js_code(js_code, tokenizer, model, max_len)  # Unpack the tuple
    formatting_issues, issue_count = check_formatting(js_code)

    # Collect results
    all_formatting_issues.append(formatting_issues)
    total_issues_count += issue_count
    print(f"Validation Score for JS code: {validation_score:.4f}")  # Valid formatting
    print(f"Formatting Issues:\n{formatting_issues}\nTotal Issues: {issue_count}")

# Print overall results
print(f"Total Formatting Issues across all samples: {total_issues_count}")


Epoch 1/100
1/1 - 9s - 9s/step - accuracy: 0.2500 - loss: 0.7613 - val_accuracy: 1.0000 - val_loss: 0.6754
Epoch 2/100
1/1 - 0s - 118ms/step - accuracy: 0.7500 - loss: 0.5762 - val_accuracy: 1.0000 - val_loss: 0.6619
Epoch 3/100
1/1 - 0s - 139ms/step - accuracy: 0.8000 - loss: 0.5245 - val_accuracy: 1.0000 - val_loss: 0.6465
Epoch 4/100
1/1 - 0s - 116ms/step - accuracy: 0.9000 - loss: 0.4761 - val_accuracy: 1.0000 - val_loss: 0.6305
Epoch 5/100
1/1 - 0s - 145ms/step - accuracy: 0.9000 - loss: 0.4173 - val_accuracy: 1.0000 - val_loss: 0.6139
Epoch 6/100
1/1 - 0s - 143ms/step - accuracy: 0.9000 - loss: 0.3585 - val_accuracy: 1.0000 - val_loss: 0.5973
Epoch 7/100
1/1 - 0s - 129ms/step - accuracy: 0.9000 - loss: 0.3626 - val_accuracy: 1.0000 - val_loss: 0.5814
Epoch 8/100
1/1 - 0s - 136ms/step - accuracy: 0.9000 - loss: 0.3603 - val_accuracy: 1.0000 - val_loss: 0.5672
Epoch 9/100
1/1 - 0s - 149ms/step - accuracy: 0.9000 - loss: 0.2974 - val_accuracy: 1.0000 - val_loss: 0.5542
Epoch 10/100
