In [None]:
import unittest
import pandas as pd
from stats_to_df import stats_to_df
import io

class TestStatsToDf(unittest.TestCase):
    def setUp(self):
        # Create a dummy stats file for testing
        self.stats_content = """Total number\t143
Insertions\t12
Deletions\t6
Substitutions\t92
Translocations\t27
Relocations\t0
Reshufflings\t0
Reshuffled blocks\t0
Inversions\t0
Unaligned sequences\t6

Uncovered ref regions num\t136
Uncovered ref regions len\t69939

DETAILED INFORMATION:
substitution\t92
gap\t0

insertion\t8
duplication\t2
tandem_duplication\t0
unaligned_beginning\t1
unaligned_end\t0
inserted_gap\t1

deletion\t5
collapsed_repeat\t1
tandem_collapsed_repeat\t0

translocation\t5
translocation-insertion\t7
translocation-insertion_ATGCN\t0
translocation-inserted_gap\t0
translocation-overlap\t15

circular_genome_start\t0
relocation\t0
relocation-insertion\t0
relocation-insertion_ATGCN\t0
relocation-inserted_gap\t0
relocation-overlap\t0


ADDITIONAL INFORMATION:
query sequences\t49
reference sequences\t116
"""
        self.file_path = "ref_query_stats.out"  # The function expects a file path
        with open(self.file_path, "w") as f:
            f.write(self.stats_content)

    def tearDown(self):
        # Clean up the dummy file after testing
        import os
        if os.path.exists(self.file_path):
            os.remove(self.file_path)

    def test_stats_to_df(self):
        # Execute the function with the test file
        df = stats_to_df(self.file_path)

        # Assertions to validate the dataframe's structure and content
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(list(df.columns), ['param', 'value', '_REF', '_QUERY'])  # Check columns
        self.assertFalse(df.empty)  # Check not empty
        self.assertTrue(all(df['value'].notnull())) # Check no nulls in value

        # Check for specific values (this depends on the expected output)
        expected_params = ['Total number', 'Insertions', 'Deletions', 'Substitutions', 'Translocations',
                           'Relocations', 'Reshufflings', 'Reshuffled blocks', 'Inversions',
                           'Unaligned sequences', 'Uncovered ref regions num', 'Uncovered ref regions len',
                           'substitution', 'gap', 'insertion', 'duplication', 'tandem_duplication',
                           'unaligned_beginning', 'unaligned_end', 'inserted_gap', 'deletion',
                           'collapsed_repeat', 'tandem_collapsed_repeat', 'translocation',
                           'translocation-insertion', 'translocation-insertion_ATGCN',
                           'translocation-inserted_gap', 'translocation-overlap', 'circular_genome_start',
                           'relocation', 'relocation-insertion', 'relocation-insertion_ATGCN',
                           'relocation-inserted_gap', 'relocation-overlap', 'query sequences',
                           'reference sequences']
        self.assertTrue(all(param in df['param'].values for param in expected_params))
        self.assertTrue(all(df['value'].dtype == 'int64' for index,row in df.iterrows() if isinstance(row['value'], int)))

        #verify ref and query names are added
        self.assertTrue(all(df['_REF'] == 'ref'))
        self.assertTrue(all(df['_QUERY'] == 'query'))

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

### Explanation

1.  **Imports**:
    * `unittest`:  The testing framework.
    * `pandas`:  Because the function returns a pandas DataFrame.
    * `stats_to_df`: The function being tested.
    * `io`:  Not used in this version, but can be useful for testing with in-memory files.
2.  **`TestStatsToDf` Class**:
    * This class inherits from `unittest.TestCase`, which provides the basic structure for tests.
3.  **`setUp` Method**:
    * This method is executed before each test method.
    * It creates a dummy stats file (`ref_query_stats.out`) with the provided data.  This ensures that the test has a consistent input file.
4.  **`tearDown` Method**:
    * This method is executed after each test method.
    * It cleans up the dummy stats file, so your directory isn't cluttered with test files.
5.  **`test_stats_to_df` Method**:
    * This is the actual test case.
    * It calls the `stats_to_df` function with the dummy file path.
    * It then uses `self.assert...` methods to make assertions about the returned DataFrame.  Here's a breakdown:
        * `self.assertIsInstance(df, pd.DataFrame)`: Checks if the result is a DataFrame.
        * `self.assertEqual(list(df.columns), ['param', 'value', '_REF', '_QUERY'])`: Checks if the columns are as expected.
        * `self.assertFalse(df.empty)`:  Checks if the DataFrame is not empty.
        * `self.assertTrue(all(df['value'].notnull()))`:  Checks if there are no null values in the `value` column.
        * `self.assertTrue(all(param in df['param'].values for param in expected_params))`:  Verifies that all expected parameters are present in the DataFrame.
        * `self.assertTrue(all(df['_REF'] == 'ref'))`:  Verifies that the `_REF` column has the correct value ('ref').
        * `self.assertTrue(all(df['_QUERY'] == 'query'))`:  Verifies that the `_QUERY` column has the correct value ('query').
6.  **`if __name__ == '__main__':` Block**:
    * This ensures that the tests are run when the script is executed directly (not when imported as a module).
    * `unittest.main()`:  Runs the tests.  `argv=['first-arg-is-ignored'], exit=False` is used to allow running this in a Jupyter Notebook without issues.

To use this in a Jupyter Notebook:

1.  Save the test code to a file named `test_stats_to_df.py` in the same directory as your `stats_to_df.py`.
2.  Open a Jupyter Notebook.
3.  Execute the following code in a cell:

In [None]:
import unittest
import pandas as pd
from stats_to_df import stats_to_df
import io

class TestStatsToDf(unittest.TestCase):
    def setUp(self):
        # Create a dummy stats file for testing
        self.stats_content = """Total number\t143
Insertions\t12
Deletions\t6
Substitutions\t92
Translocations\t27
Relocations\t0
Reshufflings\t0
Reshuffled blocks\t0
Inversions\t0
Unaligned sequences\t6

Uncovered ref regions num\t136
Uncovered ref regions len\t69939

DETAILED INFORMATION:
substitution\t92
gap\t0

insertion\t8
duplication\t2
tandem_duplication\t0
unaligned_beginning\t1
unaligned_end\t0
inserted_gap\t1

deletion\t5
collapsed_repeat\t1
tandem_collapsed_repeat\t0

translocation\t5
translocation-insertion\t7
translocation-insertion_ATGCN\t0
translocation-inserted_gap\t0
translocation-overlap\t15

circular_genome_start\t0
relocation\t0
relocation-insertion\t0
relocation-insertion_ATGCN\t0
relocation-inserted_gap\t0
relocation-overlap\t0


ADDITIONAL INFORMATION:
query sequences\t49
reference sequences\t116
"""
        self.file_path = "ref_query_stats.out"  # The function expects a file path
        with open(self.file_path, "w") as f:
            f.write(self.stats_content)

    def tearDown(self):
        # Clean up the dummy file after testing
        import os
        if os.path.exists(self.file_path):
            os.remove(self.file_path)

    def test_stats_to_df(self):
        # Execute the function with the test file
        df = stats_to_df(self.file_path)

        # Assertions to validate the dataframe's structure and content
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(list(df.columns), ['param', 'value', '_REF', '_QUERY'])  # Check columns
        self.assertFalse(df.empty)  # Check not empty
        self.assertTrue(all(df['value'].notnull())) # Check no nulls in value

        # Check for specific values (this depends on the expected output)
        expected_params = ['Total number', 'Insertions', 'Deletions', 'Substitutions', 'Translocations',
                           'Relocations', 'Reshufflings', 'Reshuffled blocks', 'Inversions',
                           'Unaligned sequences', 'Uncovered ref regions num', 'Uncovered ref regions len',
                           'substitution', 'gap', 'insertion', 'duplication', 'tandem_duplication',
                           'unaligned_beginning', 'unaligned_end', 'inserted_gap', 'deletion',
                           'collapsed_repeat', 'tandem_collapsed_repeat', 'translocation',
                           'translocation-insertion', 'translocation-insertion_ATGCN',
                           'translocation-inserted_gap', 'translocation-overlap', 'circular_genome_start',
                           'relocation', 'relocation-insertion', 'relocation-insertion_ATGCN',
                           'relocation-inserted_gap', 'relocation-overlap', 'query sequences',
                           'reference sequences']
        self.assertTrue(all(param in df['param'].values for param in expected_params))
        self.assertTrue(all(df['value'].dtype == 'int64' for index,row in df.iterrows() if isinstance(row['value'], int)))

        #verify ref and query names are added
        self.assertTrue(all(df['_REF'] == 'ref'))
        self.assertTrue(all(df['_QUERY'] == 'query'))


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

This will run the tests and display the output in your notebook.