<a href="https://colab.research.google.com/github/ShahanMalik/html_code_generator/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow




In [None]:
!pip install numpy




In [None]:
!pip install tensorflow numpy scikit-learn



In [4]:
import tensorflow as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical


class HTMLGenerationModel:
    def __init__(self, intents_file):
        # Load intents from JSON
        with open(intents_file, 'r') as f:
            self.intents_data = json.load(f)['intents']

        # Prepare training data
        self.patterns = []
        self.tags = []
        self.tag_to_response = {}

        # Collect all patterns and tags, and map tags to responses
        for intent in self.intents_data:
            for pattern in intent['patterns']:
                self.patterns.append(pattern.lower())
                self.tags.append(intent['tag'])
            self.tag_to_response[intent['tag']] = intent['response'][0]

        # Tokenization
        self.pattern_tokenizer = Tokenizer(oov_token='<OOV>', lower=True)
        self.pattern_tokenizer.fit_on_texts(self.patterns)

        # Encode tags as integers
        self.tag_to_index = {tag: i for i, tag in enumerate(set(self.tags))}
        self.index_to_tag = {i: tag for tag, i in self.tag_to_index.items()}

        # Vocabulary sizes and input length
        self.pattern_vocab_size = len(self.pattern_tokenizer.word_index) + 1
        self.max_pattern_length = max([len(pattern.split()) for pattern in self.patterns])
        self.num_classes = len(self.tag_to_index)

        # Model to be trained
        self.model = None

    def prepare_data(self):
        # Convert patterns to sequences and pad them
        pattern_sequences = self.pattern_tokenizer.texts_to_sequences(self.patterns)
        padded_patterns = pad_sequences(pattern_sequences, maxlen=self.max_pattern_length, padding='post')

        # Convert tags to one-hot labels
        tag_indices = [self.tag_to_index[tag] for tag in self.tags]
        one_hot_tags = to_categorical(tag_indices, num_classes=self.num_classes)

        return padded_patterns, one_hot_tags

    def build_model(self):
        # Simplified model architecture
        model = tf.keras.Sequential([
            tf.keras.layers.Embedding(self.pattern_vocab_size, 32, input_length=self.max_pattern_length),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(self.num_classes, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train_model(self):
        # Prepare data
        X, y = self.prepare_data()

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Build and train model
        model = self.build_model()

        # Train without early stopping to ensure some learning
        history = model.fit(
            X_train, y_train,
            epochs=50,
            batch_size=16,
            validation_split=0.2,
            verbose=1
        )

        # Evaluate the model
        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
        print(f"Test Accuracy: {test_accuracy:.4f}")

        self.model = model  # Save the trained model
        return model

    def predict_response(self, input_texts, confidence_threshold=0.3):
        if not self.model:
            raise ValueError("Model has not been trained. Call train_model() first.")

        if not isinstance(input_texts, list):
            input_texts = [input_texts]

        responses = []
        confidences = []

        for input_text in input_texts:
            # Preprocess input
            input_text = input_text.lower()
            input_sequence = self.pattern_tokenizer.texts_to_sequences([input_text])
            padded_input = pad_sequences(input_sequence, maxlen=self.max_pattern_length, padding='post')

            predictions = self.model.predict(padded_input, verbose=0)
            max_confidence = np.max(predictions)
            predicted_index = np.argmax(predictions, axis=-1)[0]

            if max_confidence < confidence_threshold:
                responses.append(None)
            else:
                predicted_tag = self.index_to_tag[predicted_index]
                responses.append(self.tag_to_response.get(predicted_tag, None))

            confidences.append(max_confidence)

        return responses, confidences


class FallbackResponseGenerator:
    def __init__(self, intents_file):
        # Load intents for more comprehensive fallback
        with open(intents_file, 'r') as f:
            self.intents_data = json.load(f)['intents']

        # Default responses
        self.default_responses = [
            "<div>HTML generation not supported</div>",
            "<p>Unable to generate specific HTML</p>"
        ]

    def get_fallback_response(self, query):
        # Try to find a response based on keywords
        query_lower = query.lower()

        for intent in self.intents_data:
            for pattern in intent['patterns']:
                if pattern.lower() in query_lower:
                    return intent['response'][0]

        # Return a random default response if no match
        return np.random.choice(self.default_responses)


def check_language_restriction(user_input):
    # List of common programming languages and technologies to restrict
    restricted_languages = [
        'python', 'java', 'javascript', 'js', 'css', 'php',
        'ruby', 'c++', 'c#', 'swift', 'kotlin', 'sql',
        'typescript', 'rust', 'golang', 'perl', 'react',
        'vue', 'angular', 'node', 'django', 'flask'
    ]

    user_input = user_input.lower()
    return any(lang in user_input for lang in restricted_languages)

import re

def main():
    # Initialize the HTML generation model
    intents_file = 'intents.json'
    html_model = HTMLGenerationModel(intents_file)

    # Train the model
    html_model.train_model()

    # Initialize fallback generator
    fallback_generator = FallbackResponseGenerator(intents_file)

    # Interactive loop
    while True:
        try:
            user_input = input("Enter your HTML code generation request (or 'exit' to quit): ")

            if user_input.lower() == 'exit':
                break

            # Reject non-HTML queries explicitly
            if check_language_restriction(user_input):
                print("This model is designed to generate only HTML code. Please ask for HTML-specific content.")
                continue

            # Split the input text based on "and" or ","
            statements = [stmt.strip() for stmt in re.split(r'\band\b|,', user_input)]

            # Predict the response for each statement
            responses, confidences = html_model.predict_response(statements)

            # Collect all generated HTML snippets
            html_snippets = []
            for statement, response, confidence in zip(statements, responses, confidences):
                if response:
                    html_snippets.append(response)
                else:
                    # Use fallback generator for unrecognized queries
                    fallback_html = fallback_generator.get_fallback_response(statement)
                    html_snippets.append(fallback_html)

            # Combine all snippets into a complete HTML document
            complete_html = """
<!DOCTYPE html>
<html>
<head>
<title>Html generator</title>
</head>
<body>
{}
</body>
</html>
            """.format("\n".join(html_snippets))

            print(complete_html)

        except Exception as e:
            print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Epoch 1/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 140ms/step - accuracy: 0.0274 - loss: 4.4901 - val_accuracy: 0.0227 - val_loss: 4.4883
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0726 - loss: 4.4722 - val_accuracy: 0.0000e+00 - val_loss: 4.4947
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1126 - loss: 4.4537 - val_accuracy: 0.0000e+00 - val_loss: 4.5008
Epoch 4/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1789 - loss: 4.4253 - val_accuracy: 0.0000e+00 - val_loss: 4.5142
Epoch 5/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1635 - loss: 4.3981 - val_accuracy: 0.0000e+00 - val_loss: 4.5342
Epoch 6/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1371 - loss: 4.3562 - val_accuracy: 0.0000e+00 - val_loss: 4.5652
Epoch 7/50
[1m11/