In [7]:
import pandas as pd
import json
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from scipy.sparse import vstack

# Function to load JSON file into a dictionary
def load_json_as_dict(json_file_path):
    with open(json_file_path, mode='r', encoding='utf-8') as json_file:
        data = json.load(json_file)
    return data

# Load the dataset
json_file_path = "nmap_commands_1.json"
nmap_dataset = load_json_as_dict(json_file_path)

# Convert the dictionary to a pandas DataFrame
df = pd.DataFrame(list(nmap_dataset.items()), columns=['Command', 'Description'])

# Preprocessing
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    if isinstance(text, dict):
        text = json.dumps(text)  # Convert dict to JSON string if necessary
    tokens = word_tokenize(text)
    cleaned_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens if token.isalpha() and token.lower() not in stop_words]
    return ' '.join(cleaned_tokens)

df['Processed_Description'] = df['Description'].apply(preprocess_text)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Processed_Description'])

# Initialize Logistic Regression model
model = LogisticRegression(max_iter=1000, random_state=42)

# Train the initial model
model.fit(X, df['Command'])

# Function to predict and get feedback
def predict_nmap_command(query):
    processed_query = preprocess_text(query)
    query_vector = vectorizer.transform([processed_query])
    predicted_command = model.predict(query_vector)
    return predicted_command[0]

def log_feedback(query, predicted_output, feedback):
    with open("feedback_log.txt", "a") as f:
        f.write(f"Query: {query}\n")
        f.write(f"Predicted Output: {predicted_output}\n")
        f.write(f"Feedback: {feedback}\n")
        f.write("="*30 + "\n")

# Example pseudocode for the feedback loop
while True:
    user_query = input("Please enter your query (type 'exit' to terminate): ")
    
    if user_query.lower() == 'exit':
        print("Terminating...")
        break
    
    predicted_output = predict_nmap_command(user_query)
    print(f"Predicted Nmap command: nmap {predicted_output}")
    
    feedback = input("Was this output correct? Type 'yes', 'no', or 'close': ")
    
    # Log feedback
    log_feedback(user_query, predicted_output, feedback)
    
    # Update model if feedback is provided
    if feedback == 'no':
        # Add the current query to the training set
        current_query_vector = vectorizer.transform([preprocess_text(user_query)])
        X = vstack([X, current_query_vector])
        df = pd.concat([df, pd.DataFrame({'Command': [predicted_output], 'Description': [user_query]})], ignore_index=True)
        model.fit(X, df['Command'])
        print("Model updated based on user feedback.")
    
print("Session ended.")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\OMEN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\OMEN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\OMEN\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Predicted Nmap command: nmap -sU
Terminating...
Session ended.
