In [None]:
import unittest
import pandas as pd
import numpy as np
import json_to_df  # Assuming json_to_df.py is in the same directory

class TestJsonToDf(unittest.TestCase):

    # Sample JSON data for testing
    sample_json = {
        "genome": {"genome_id": "123", "genome_name": "TestGenome"},
        "stats": {"total_genes": 1000, "coding_genes": 900},
        "run": {"start_time": "2024-01-01", "end_time": "2024-01-02"},
        "version": {"bakta": "1.5.0"},
        "features": [{"feature_id": "f1", "type": "gene", "location": "1..100"},
                     {"feature_id": "f2", "type": "CDS", "location": "200..300"}],
        "sequences": [{"sequence_id": "s1", "length": 10000, "description": "Escherichia coli K12 MG1655 complete genome", "orig_description": "len=10000 cov=20x corr=yes origname=NC_000913 sw=bwa date=2024-01-01"},
                      {"sequence_id": "s2", "length": 5000, "description": "Salmonella enterica Typhimurium SL1344 complete genome", "orig_description": "len=5000 cov=30x corr=yes origname=NC_003197 sw=bwa date=2024-01-01"}]
    }

    def test_prep_info_df(self):
        """Tests the prep_info_df function."""

        info_df = json_to_df.prep_info_df(self.sample_json, "sample1")

        # Assert that the output is a DataFrame
        self.assertIsInstance(info_df, pd.DataFrame)

        # Assert that the DataFrame has the correct columns
        expected_columns = ['identifier', 'genome_genome_id', 'genome_genome_name', 'stats_total_genes', 'stats_coding_genes', 'run_start_time', 'run_end_time', 'version_bakta']
        self.assertListEqual(list(info_df.columns), expected_columns)

        # Assert that the identifier column is correctly added
        self.assertTrue('identifier' in info_df.columns)
        self.assertTrue(all(info_df['identifier'] == 'sample1'))

        # Check for correct data types (important for potential database interactions)
        self.assertTrue(pd.api.types.is_string_dtype(info_df['identifier']))
        self.assertTrue(pd.api.types.is_string_dtype(info_df['genome_genome_name']))
        self.assertTrue(pd.api.types.is_integer_dtype(info_df['stats_total_genes']))

    def test_prep_features_df(self):
        """Tests the prep_features_df function."""

        features_df = json_to_df.prep_features_df(self.sample_json, "sample1")

        # Assert that the output is a DataFrame
        self.assertIsInstance(features_df, pd.DataFrame)

        # Assert that the DataFrame has the correct columns
        expected_columns = ['identifier', 'feature_id', 'type', 'location']
        self.assertListEqual(list(features_df.columns), expected_columns)

        # Assert that all values are strings
        self.assertTrue(all(features_df.apply(lambda col: col.apply(lambda x: isinstance(x, str))).all()))

        # Assert identifier column is correctly added
        self.assertTrue('identifier' in features_df.columns)
        self.assertTrue(all(features_df['identifier'] == 'sample1'))

    def test_prep_sequences_df(self):
        """Tests the prep_sequences_df function."""

        sequences_df = json_to_df.prep_sequences_df(self.sample_json, "sample1")

        # Assert that the output is a DataFrame
        self.assertIsInstance(sequences_df, pd.DataFrame)

        # Assert that the DataFrame has the correct columns
        expected_columns = ['identifier', 'sequence_id', 'length', 'description', 'len', 'cov', 'corr', 'origname', 'sw', 'date', 'genus', 'species', 'gcode', 'topology']
        self.assertListEqual(list(sequences_df.columns), expected_columns)

        # Assert that the 'len' column exists after splitting
        self.assertTrue('len' in sequences_df.columns)

        # Check for object type after string operations
        self.assertTrue(pd.api.types.is_object_dtype(sequences_df['len']))

        # Assert identifier column
        self.assertTrue('identifier' in sequences_df.columns)
        self.assertTrue(all(sequences_df['identifier'] == 'sample1'))


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

**Explanation:**

1.  **Import Necessary Libraries:**
    * `unittest`:  The standard Python testing framework.
    * `pandas`:  For DataFrame manipulation (as used in the code).
    * `numpy`: For numerical operations (as used in the code).
    * `json_to_df`:  Imports the module you want to test.  **Important:** This assumes `json_to_df.py` is in the same directory as your test script.

2.  **Define a Test Class:**
    * `TestJsonToDf(unittest.TestCase)`:  Creates a class that inherits from `unittest.TestCase`.  This is essential for using the `unittest` framework.  Each test method will be part of this class.

3.  **Sample Data:**
    * `sample_json`:  This is a dictionary that mimics the structure of the JSON data your functions are designed to process.  It's crucial to create sample data that covers various scenarios your functions might encounter (e.g., missing data, different data types).

4.  **Test Methods:**
    * Each method in the `TestJsonToDf` class that *starts with `test_`* is automatically recognized by `unittest` as a test case.
    * **`test_prep_info_df(self)`:**
        * Calls `json_to_df.prep_info_df()` with the sample data.
        * `self.assertIsInstance(info_df, pd.DataFrame)`:  Asserts that the result is a pandas DataFrame.
        * `self.assertListEqual(list(info_df.columns), expected_columns)`: Asserts that the DataFrame has the columns you expect, and in the correct order.
        * `self.assertTrue(all(info_df['identifier'] == 'sample1'))`:  Verifies that the 'identifier' column was added correctly to all rows.
        * `self.assertTrue(pd.api.types.is_string_dtype(info_df['identifier']))`: Checks the datatype of the column.
    * **`test_prep_features_df(self)`:**
        * Similar structure to `test_prep_info_df`, but tests the `prep_features_df` function.
        * `self.assertTrue(all(features_df.apply(lambda col: col.apply(lambda x: isinstance(x, str))).all()))`:  This is a more complex assertion that checks if *every* value in the DataFrame is a string, which is what your function is supposed to do.
    * **`test_prep_sequences_df(self)`:**
        * Tests the `prep_sequences_df` function.
        * Includes checks for the splitting of columns (`orig_description`, `description`) and the dropping of the original columns.
        * `self.assertTrue(pd.api.types.is_object_dtype(sequences_df['len']))`:  Verifies the column type after the split and string cleaning operations.

5.  **Running the Tests:**
    * `if __name__ == '__main__':`:  This ensures that `unittest.main()` is only called when you run the script directly (not when you import it as a module).
    * `unittest.main(argv=['first-arg-is-ignored'], exit=False)`:  This line runs the tests.  
        * `argv=['first-arg-is-ignored']` is a bit of a quirk for running tests within a Jupyter Notebook.  `unittest.main` expects command-line arguments, and this provides a dummy argument to avoid errors.
        * `exit=False` is *crucial* for Jupyter Notebooks.  The default behavior of `unittest.main` is to call `sys.exit()`, which will terminate the Jupyter Notebook kernel.  `exit=False` prevents this.

**How to Use in a Jupyter Notebook:**

1.  **Save the Code:** Save the Python code above as a `.py` file (e.g., `test_json_to_df.py`) in the same directory as your `json_to_df.py` file.
2.  **Create a Jupyter Notebook:** Open a new Jupyter Notebook.
3.  **Import and Run:** In a cell in your notebook, paste the following code and run the cell:

In [None]:
import unittest
    import pandas as pd
    import numpy as np
    import json_to_df  # Assuming json_to_df.py is in the same directory

    class TestJsonToDf(unittest.TestCase):

        # Sample JSON data for testing
        sample_json = {
            "genome": {"genome_id": "123", "genome_name": "TestGenome"},
            "stats": {"total_genes": 1000, "coding_genes": 900},
            "run": {"start_time": "2024-01-01", "end_time": "2024-01-02"},
            "version": {"bakta": "1.5.0"},
            "features": [{"feature_id": "f1", "type": "gene", "location": "1..100"},
                         {"feature_id": "f2", "type": "CDS", "location": "200..300"}],
            "sequences": [{"sequence_id": "s1", "length": 10000, "description": "Escherichia coli K12 MG1655 complete genome", "orig_description": "len=10000 cov=20x corr=yes origname=NC_000913 sw=bwa date=2024-01-01"},
                          {"sequence_id": "s2", "length": 5000, "description": "Salmonella enterica Typhimurium SL1344 complete genome", "orig_description": "len=5000 cov=30x corr=yes origname=NC_003197 sw=bwa date=2024-01-01"}]
        }

        def test_prep_info_df(self):
            """Tests the prep_info_df function."""

            info_df = json_to_df.prep_info_df(self.sample_json, "sample1")

            # Assert that the output is a DataFrame
            self.assertIsInstance(info_df, pd.DataFrame)

            # Assert that the DataFrame has the correct columns
            expected_columns = ['identifier', 'genome_genome_id', 'genome_genome_name', 'stats_total_genes', 'stats_coding_genes', 'run_start_time', 'run_end_time', 'version_bakta']
            self.assertListEqual(list(info_df.columns), expected_columns)

            # Assert that the identifier column is correctly added
            self.assertTrue('identifier' in info_df.columns)
            self.assertTrue(all(info_df['identifier'] == 'sample1'))

            # Check for correct data types (important for potential database interactions)
            self.assertTrue(pd.api.types.is_string_dtype(info_df['identifier']))
            self.assertTrue(pd.api.types.is_string_dtype(info_df['genome_genome_name']))
            self.assertTrue(pd.api.types.is_integer_dtype(info_df['stats_total_genes']))

        def test_prep_features_df(self):
            """Tests the prep_features_df function."""

            features_df = json_to_df.prep_features_df(self.sample_json, "sample1")

            # Assert that the output is a DataFrame
            self.assertIsInstance(features_df, pd.DataFrame)

            # Assert that the DataFrame has the correct columns
            expected_columns = ['identifier', 'feature_id', 'type', 'location']
            self.assertListEqual(list(features_df.columns), expected_columns)

            # Assert that all values are strings
            self.assertTrue(all(features_df.apply(lambda col: col.apply(lambda x: isinstance(x, str))).all()))

            # Assert identifier column is correctly added
            self.assertTrue('identifier' in features_df.columns)
            self.assertTrue(all(features_df['identifier'] == 'sample1'))

        def test_prep_sequences_df(self):
            """Tests the prep_sequences_df function."""

            sequences_df = json_to_df.prep_sequences_df(self.sample_json, "sample1")

            # Assert that the output is a DataFrame
            self.assertIsInstance(sequences_df, pd.DataFrame)

            # Assert that the DataFrame has the correct columns
            expected_columns = ['identifier', 'sequence_id', 'length', 'description', 'len', 'cov', 'corr', 'origname', 'sw', 'date', 'genus', 'species', 'gcode', 'topology']
            self.assertListEqual(list(sequences_df.columns), expected_columns)

            # Assert that the 'len' column exists after splitting
            self.assertTrue('len' in sequences_df.columns)

            # Check for object type after string operations
            self.assertTrue(pd.api.types.is_object_dtype(sequences_df['len']))

            # Assert identifier column
            self.assertTrue('identifier' in sequences_df.columns)
            self.assertTrue(all(sequences_df['identifier'] == 'sample1'))


    if __name__ == '__main__':
        unittest.main(argv=['first-arg-is-ignored'], exit=False)

**Important Notes:**

* **Error Messages:** If any of the assertions fail, `unittest` will raise an `AssertionError` with a message explaining the failure.  This will help you pinpoint exactly what's going wrong in your code.
* **Test Coverage:** These tests provide a good starting point, but consider adding more tests to cover edge cases, error conditions (e.g., malformed JSON), and different types of input data.  Aim for high test coverage to ensure the robustness of your code.
* **Maintenance:** As you modify your `json_to_df.py` file, *always* run your unit tests to make sure you haven't introduced any regressions (i.e., broken existing functionality).