In [1]:
# Standalone JSON Parser for SQLi Request Data

import pandas as pd
import json
import os # For file path checks

def parse_sqli_json_data(json_file_path: str) -> pd.DataFrame:
    """
    Parses a JSON file containing network requests into a Pandas DataFrame
    with 'Method', 'URL', 'length', and 'content' columns.

    Assumes the JSON file contains a list of dictionaries, where each dictionary
    represents a request and has keys like 'method', 'url', and 'data'.
    The 'length' column is calculated based on the character length of the 'data' field.

    Args:
        json_file_path (str): Path to the input JSON file.

    Returns:
        pd.DataFrame: DataFrame in the desired input format:
                      Columns: ['Method', 'URL', 'length', 'content'].
                      Returns an empty DataFrame if the file cannot be found or parsed.
    """
    parsed_records = []
    try:
        # Check if the file exists before trying to open
        if not os.path.exists(json_file_path):
            print(f"Error: JSON file not found at {json_file_path}")
            return pd.DataFrame(columns=['Method', 'URL', 'length', 'content'])

        with open(json_file_path, 'r', encoding='utf-8') as f:
            requests_data = json.load(f)

        # Ensure the loaded data is a list. If it's a single dictionary, wrap it.
        if not isinstance(requests_data, list):
            if isinstance(requests_data, dict):
                requests_data = [requests_data]
            else:
                print(f"Warning: JSON data in '{json_file_path}' is neither a list nor a dictionary. Type: {type(requests_data)}")
                return pd.DataFrame(columns=['Method', 'URL', 'length', 'content'])

        print(f"Parsing {len(requests_data)} records from '{json_file_path}'...")
        for req in requests_data:
            method = req.get('method', 'UNKNOWN').upper() # Default to UNKNOWN, ensure uppercase
            url = req.get('url', '') # Default to empty string

            # The 'data' field in your JSON image corresponds to 'content'
            content = req.get('data', '')

            # Ensure content is a string for length calculation and consistent handling
            if not isinstance(content, str):
                # If 'data' field contains non-string, e.g., an empty list or object,
                # convert it to its JSON string representation. If it's None, treat as empty.
                content_str = json.dumps(content) if content is not None else ''
            else:
                content_str = content

            length = len(content_str) # Calculate length based on the (stringified) content

            parsed_records.append({
                'Method': method,
                'length': length,
                'content': content_str,
                'URL': url
                
            })

        print("JSON parsing complete.")
        return pd.DataFrame(parsed_records)
    

    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from {json_file_path}: {e}")
        return pd.DataFrame(columns=['Method', 'URL', 'length', 'content'])
    except Exception as e:
        print(f"An unexpected error occurred during JSON parsing: {e}")
        return pd.DataFrame(columns=['Method', 'URL', 'length', 'content'])

# --- Example Usage (for testing the standalone function) ---
if __name__ == '__main__':
    # Create a dummy JSON file for demonstration
    # This structure is based on the image you provided (keys: method, url, data, headers)
    your_json_file_path = r"C:\Users\shiva\OneDrive\Desktop\browsing_2024_facebook.json"# <--- CHANGE THIS LINE!

    # Call the parser function with your file's path
    parsed_df = parse_sqli_json_data(your_json_file_path)

    if not parsed_df.empty:
        print("\nSuccessfully parsed data from your file:")
        print(parsed_df.head(10))
        print(parsed_df.tail(10))
        print(f"\nDataFrame shape: {parsed_df.shape}")
        print("\nDataFrame dtypes:")
        print(parsed_df.dtypes)
        parsed_df.to_csv('abc_f.csv', index=False)
    else:
        print("\nNo data was parsed from your file or an error occurred. Check the file path and content.")

    # You won't need to remove your actual file, so remove or comment out this line:
    # if os.path.exists(dummy_json_file_path):
    #     os.remove(dummy_json_file_path)
    #     print(f"\nCleaned up dummy JSON file: {dummy_json_file_path}")

Parsing 1005 records from 'C:\Users\shiva\OneDrive\Desktop\browsing_2024_facebook.json'...
JSON parsing complete.

Successfully parsed data from your file:
  Method  length                                            content  \
0   POST       1                                                      
1    GET       0                                                      
2    GET       0                                                      
3   POST    1114  {"request":{"@os":"win","@updater":"chromecrx"...   
4    GET       0                                                      
5    GET       0                                                      
6    GET       0                                                      
7    GET       0                                                      
8    GET       0                                                      
9    GET       0                                                      

                                                 URL  
0  /Lis