# Classes and modules

## Recall

Last unit we learned how to use control structures like ```for```, ```if``` and ```functions```.
We used this knowledge to complete the code to read in the comma-separated-values-files.

<details>
  <summary>Show Code</summary>

```Python
def get_maximal_duration_and_units(units_file_path):
    """!
        @brief This function reads in a csv-file containing the recorded units and returnt the maximal duration and unit ids
        @details We assume that we get an csv-file with a header line and 4 fields per row.
            The name and contents of the csv-file should be defined as given in param, so we can extract the unit id and spike time.
        @param units_file_path the path to the csv-file as a str
            The 2nd field should contain the id of the unit.
            The 4th the spike time.
        @return the highest spike time and a set of discovered unit-ids
    """
    longest_duration = 0
    units = set()

    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_up = int(spike_time) + 1
        if longest_duration < spike_time_rounded_up:
            longest_duration = spike_time_rounded_up
        if unit_id not in units:
            units.add(unit_id)
    units_file.close()
    return (longest_duration, units)

def get_a_list_with_all_the_seconds(longest_duration):
    """!
        @brief Gets a list containing all the seconds between 1 and the longest durtion
        @param longest_duration the last second in the list as an int
        @return a list with all the seconds
    """
    seconds = list()
    for second in range(1, longest_duration + 1):
        seconds.append(second)
    return seconds

def get_empty_unit_spike_counts(seconds, units):
    """!
        @brief Creates a dicionary containing a list filled with 0s for every unit-id
        @details the lists in the dict are intended as counters for the spikes
        @return a dict containing a list filled with 0s for every unit-id
    """
    empty_spike_counts = dict()
    for unit_id in units:
        empty_spike_count = seconds.copy()
        for index in range(0, len(empty_spike_count)):
            empty_spike_count[index] = 0
        empty_spike_counts[unit_id] = empty_spike_count
    return empty_spike_counts

def get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts):
    """!
        @brief This function counts the spike in every seconds
        @details it reads in the units row by row and fills up the corresponding entries in the dict
            the dict should contain an entry for every unit-id containing a list with an int for every second
        @param units_file_path a path to the unit-csv-file
    """
    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_down = int(spike_time)
        index = spike_time_rounded_down - 1
        units_spike_counts[unit_id][index] += 1
    units_file.close()
    return

def get_immobility_for_every_second(immobility_file_path, seconds):
    """!
        @brief Gets a list expressing immobility for every second
        @details This function takes the phases given in the immobility file
            and creates a list of bools marked accordingly.
            The list is crated by coping seconds
        @param immobility_file_path the path to the immobility-csv
        @param seconds a list enumerating the seconds
        @return a list of bools marking immobility of every second
    """
    is_immobile = seconds.copy()
    phases = list()
    immobility_file = open(immobility_file_path)
    firts_row = True
    for row in immobility_file:
        if firts_row:
            firts_row = False
            continue
        begin_in_seconds, end_in_seconds = row.split(",")
        begin_in_seconds = int(begin_in_seconds)
        end_in_seconds = int(end_in_seconds)
        phase = (begin_in_seconds, end_in_seconds)
        phases.append(phase)
    immobility_file.close()
    for index in range(0, len(is_immobile)):
        second = seconds[index]
        is_in_phase = False
        for phase in phases:
            begin_in_seconds, end_in_seconds = phase
            if second > begin_in_seconds and second < end_in_seconds:
                is_in_phase = True
                break
        is_immobile[index] = is_in_phase
    return is_immobile

# Main part of the script

# Name the file we are going to use
units_file_path = "./data_neuron/session_2023111501010_units.csv"
immobility_file_path = "./data_neuron/session_2023111501010_immobility.csv"

# Create a table
table = list()
# Add a column for the seconds
longest_duration, units = get_maximal_duration_and_units(units_file_path)
seconds = get_a_list_with_all_the_seconds(longest_duration)
table.append(seconds)
# Add columns for the spike-counts
units_spike_counts = get_empty_unit_spike_counts(seconds, units)
get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts)
for unit_id in units_spike_counts.keys():
    table.append(units_spike_counts[unit_id])
# Add a column for the immobility
table.append(get_immobility_for_every_second(immobility_file_path, seconds))

print(table)
```

</details>

You may have expected that reading in csv-files is a common task and there is a more comfortable solution, then coding it yourself.
In Python, we often use code provided via the python package index.
This code usually comes in the form of modules.
Most modules use classes.
So this unit deals with first with classes and then modules.

## Classes

In the last unit you learned how to use **functions** to structure your code.
**functions** usually take **arguments** as inputs.
Programmers learnt that most **functions** are very closely associated with a set of **arguments**. 
So they concluded that these arguments should be bundled in [structures]( https://en.wikipedia.org/wiki/Struct_(C_programming_language) and associated with their **functions**.
This lead to **classes** and the emergence of [object-oriented-programming](https://en.wikipedia.org/wiki/Object-oriented_programming).

Object oriented programming tries to understand the world as a limited set of abstract ideas.
It tries to simplify, by finding a set of shared attributes and behaviors.
The original data for example decided to describe all the spikes by:

- their rat ID
- their unit ID
- their channel
- the time they occured

So they defined a "spike" **class**. 
The objects belonging to or the **instances** of this **class**  are the entries in the csv-file we saw.

As mentioned before classes are a combination of **functions** and **values**.
So we could add a **function** to this spike-**class** if we want, like calculating getting the rounded down spike time.
This **function** belonging to the class would then be called a **method**.

There are a few **methods** that should always exist.
If we do not write them Python will create some dummy method instead.
A **method** that almost every class will have is the constructor, that transfers the **values** into the object / **instance**.
In Python it is called ```__init__```.

To create a **class** we begin with the keyword ```class``` followed by its name and ```:```.
We then begin listing its attributes like the **methods**.
So we write a **function** called ```__init__```.
The first argument of every **method** in Python has to be the **instance** of the **class** itself,
therefore it is usually named ```self```, then the other **arguments** follow.

Let us put this into code:
```Python
# Define the class with the name "Cell"
class Spike:
    # Write the constructor with all the arguments we need to store our attributes
    def __init__(self, rat_id, unit_id, channel, spike_time):
        # I use to name the attributes in the cell like the arguments to avoid confusion
        # You may as well named them differently and write:
        # self.rat_id = rat_identification
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
```

This snippet defines the **class**, but does not create an **instance**.
The computer knows what spike is, but it does not know any specific spike.
To create a specific spike we have to create an **instance**.
In our case, we will create an **instance** called ```spike1```:

```Python
spike_1_rat_id = 1
spike_1_unit_id = 0
spike_1_channel = 2
spike_1_spike_time = 0.5725
spike_1 = Spike(spike_1_rat_id, spike_1_unit_id, spike_1_channel,  spike_1_spike_time)
```

If we no wish to gain the time of the spike we can access the attribute or member by following up the name of our **instance** with ```.``` and the name of the member:

```Python
spike_1_spike_time = spike_1.spike_time
```

This approach become useful if we deal with a large number of **instances**, in our case a few hundred spikes for example.

Now you know how classes are generally used. Let us start with a few exercises.
First create write your own Spike **class** and add a method to get the rounded down spike time.
To access the attributes in the class use ```self```.
If you are stuck at this exercise try to search for inspiration on the internet.

In [None]:
# Your code goes here

<details>
  <summary>Click to reveal solution</summary>

```Python
class Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self):
        return int(self.spike_time)

spike_1_rat_id = 1
spike_1_unit_id = 0
spike_1_channel = 2
spike_1_spike_time = 0.5725
spike_1 = Spike(spike_1_rat_id, spike_1_unit_id, spike_1_channel,  spike_1_spike_time)

print(spike_1.rounded_down_spike_time())
```

</details>

## Documenting classes and functions

Last unit we discussed readable code and mentioned docstrings.
Now is the time to see them in action.
The difference between a docstring and a normal comment is that the doc-string can be accessed from within your code.
So people can explore your code interactively.
They are also attached to an object in your code, like a **class** or a **function**, so you can use them to describe this object in detail.

### Docstrings

To write a good docstring there are multiple conventions to choose form.
The first is the rather free [PEP 257](https://peps.python.org/pep-0257/) which may look like this:

```Python
class Spike:
    """The activity of a neural unit """
    def __init__(self, rat_id, unit_id, channel, spike_time):
        """Constrcuts a new spike

        Arguments:
        rat_id -- the identification number of the rat
        unit_id -- the identification number of the neural unit
        channel -- the identification number of the channel
        spike_time -- the time the spike occurs
        """
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self):
        """Gets the truncated spike time

        The spike time is rounded to an int by truncating the fraction
        """
        return int(self.spike_time)
```

As you may have noticed, it consists of three pieces of information:
- What goes in (input / arguments)
- What goes out (output / return)
- What is its role.
      What does a **function**/**method** do, what does a class represent.

For our example, this is quite straightforward and you may opt to skip the documentation entirely,
because the name of the class and methods are quite straightforward.
This will not be the case for long, because your projects will use more complex **classes**, that need to be explained to newcomers.
If we would try to get the temporal density of the spikes in an area we would probably use a [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation),
which you would probably need a few minutes and a little bit of text to understand.
Therefore, if I would write code for you that does this, I should explain it as we did in the example.

Please keep in mind that while you write your code you build up expertise in your area.
This means that you start to overestimate what is common knowledge, therefore it is better to document too much.
Otherwise you end up like these guys in this [xkcd-comic]( https://xkcd.com/2501/):

![xkcd average familiarity, describes that experts drastically overestimate the average familiarity of the general population with their field.](https://imgs.xkcd.com/comics/average_familiarity.png)

The python oriented documenting tool is [Sphinx](https://www.sphinx-doc.org/en/master/index.html),
which encourages the user to provide more information.
If you use it you should probably start from an [example](https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html) and read the documentation later, because it is not trivial.
So our documented class would look like this:

```Python
class Spike:
    """The activity of a neural unit

    :param rat_id: the identification number of the rat
    :type rat_id: int
    :param unit_id: the identification number of the neural unit
    :type unit_id: int
    :param channel: the identification number of the channel
    :type channel: int
    :param spike_time: the time the spike occurs
    :type spike_time: float
    """
    def __init__(self, rat_id, unit_id, channel, spike_time):
        """Constructor method
        """
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self):
        """Gets the truncated spike time

        The spike time is rounded to an int by truncating the fraction

        :return: The rounded spike time
        :rtype: int
        """
        return int(self.spike_time)
```

A different solution is [doxygen](https://www.doxygen.nl/manual/docblocks.html#pythonblocks).
It supports multiple languages and was widely adopted by the [C++](https://de.wikipedia.org/wiki/C%2B%2B)-community.
Its support for Python exists but it often fails to document the finer features.
I advise to use it if your project uses multiple languages or your team has previous experience with it.
The documentation would look like this:

```Python
class Spike:
    """!
    @brief The activity of a neural unit
    """
    def __init__(self, rat_id, unit_id, channel, spike_time):
        """!
        @brieft Constrcuts a new spike

        @param rat_id the identification number of the rat
        @param unit_id the identification number of the neural unit
        @param channel the identification number of the channel
        @param spike_time the time the spike occurs
        @return A new instance of the Spike class
        """
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self):
        """!
        @brief Gets the truncated spike time
        @details The spike time is rounded to an int by truncating the fraction
        
        @return The rounded down spike time
        """
        return int(self.spike_time)
```

After we learned all this is is time to practice, first you should investigate how to get a docstring.
Please view the docstring for **int** by using the help function, in the cell below.
Also consider investigating a few other types.

In [None]:
help(int)

### Type hinting

Python attempts to hide **types** from us, by using a method called [duck-typing](https://en.wikipedia.org/wiki/Duck_typing).
Duck-typing is named after the [duck-test]( https://en.wikipedia.org/wiki/Duck_test):

>	If it looks like a duck, swims like a duck, and quacks like a duck, then it probably is a duck.

To give a concrete example if it can be used with ```+```, ```-```,```/``` and ```*``` it is probably a number.
So python does not care if something is an **int** or an **float** as long as they behave the same.
This concept can then be expanded to other **types** like **list** or **tuple**.
So if all your **function** does is access an argument like ```argument[3]``` it does not matter if it is a **list** or a **tuple**.
This behavior is often useful, because **functions** can work on **types** they were not designed for, but there are dangers.
Consider the following example:

```Python
test_list = ["Test A", "Test B", "Test C"]
index_b = 1
index_c = 4 / 2
print(test_list[index_b])
print(test_list[index_c])
```

Please predict the ouput and the execute the code in the cell below:

In [None]:
# Copy code here

As you may have noticed the ```[]```-**operator** only takes **ints**, but the ```/```, produced a **float**leading to an error.
This is danger of duck-typing, everything might work for most of the **functions**,
bit then break at the very end. Now you have to figure out what went wrong.

The great danger is not the code crashing and raising an error, but everything seemingly working find and an incorrect result.
Let us take our list again and print the first entry with the following code.
Please predict what happens before you execute:

```Python
test_list = ["Test A", "Test B", "Test C"],
print(test_list[0])
```

In [None]:
# Copy code here

So what went wrong here? If you investigate the first line you will see the extra ```,``` at the end.
This means that the first line could be read as ```test_list =([ "Test A", "Test B", "Test C"],)```, so ```test_list``` was actually a **tuple**.
So ```test_list[0]``` gave us the first element of the **tuple**, which was the **list**.
I hope you can see how such a typo might be missed and cause problems later, without raising an error.

To avoid this problem we have two tools at our disposal.
The first one is additional documentation in the form of [type-hints]( https://docs.python.org/3/library/typing.html),
these permit us to communicate the intended **type**.
The final users may still feed other stuff into it, but they now have to option to supply the **type** we had in mind when we wrote the code. 

For type hinting we add the **type** of an argument behind it separated by ```:```.
The return **type** is written at the end of a **function**/**method** after ```->```.
If nothing is returned the return **type** is ```None```, so we write ```-> None```.
We can only use **classes** that are fully defined, so a class can not hint at itself, for this reason the return of ```__init__``` is not hinted.

Here is an example of type-hinting demonstrated on our point-class:

```Python
class Spike:
    def __init__(self, rat_id: int, unit_id: int, channel: int, spike_time: float):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)
```

Now, this is the polite way for people that read documentation.
Considering that,
I often work with my own code I do not need to be polite
and do not read the documentation of something I wrote just a few weeks ago,
I use a different method for my code.
Since I know what inputs I expect, I check for them and raise an error if the **type** is not expected.
This approach permits me to fail early and fix the problem at its source instead of searching for it for hours.
I assume you will find similar uses of ```type``` and ```isinstance``` in a few code-bases.

The two **functions** ```type``` and ```isinstance``` allow us to investigate the **type** of an object.
So if we wish to get the **type** of a variable ```a``` we write ```type(a)```.  Here is a short example:

```Python
a = 2.3
type_of_a = type(a)
print(type_of_a)
```

Please execute it in the cell below and interpret the result.

In [None]:
# Copy code here

So our object ```a``` had the **type** ```<class 'float'>```.
This means it is a **class** and the **class** is called ```float```.
So internally Pyshon uses **classes** for all data-**types**, which sets it apart from older languages like C++ or Java.

We can use this in a number of ways.
We can compare **types** use them in conditional statements like a if-statement or them in error messages,
for the former two we should use ```isinstance```, because it is faster.
So if we wish to raise an error if anything but an **int** is supplied we would write:

```Python
def raise_error_if_int(argument:any)->None:
    if not isinstance(argument, int):
        message = "The supplied argument ("
        message += str(argument)
        message +=") was of type \""
        message += str(type(argument))
        message += "\" but \""
        message +=  str(int)
        message += "\" was expected."
        raise TypeError(message)

correct = 1
not_correct = 1.0

raise_error_if_int(correct)
raise_error_if_int(not_correct)
```

Or if we want to shorten the **function** by using [formatted string](https://docs.python.org/3/tutorial/inputoutput.html):

```Python
def raise_error_if_int(argument:any)->None:
    if not isinstance(argument, int):
        message = f"The supplied argument ({str(argument)}) was of type \"{str(type(argument))}\"  but \"{str(int)}\" was expected."
        raise TypeError(message)

correct = 1
not_correct = 1.0

raise_error_if_int(correct)
raise_error_if_int(not_correct)
```

Please copy your prefered version in the box below and execute it.

In [None]:
# Copy code here

Before we move on I have a very simple question.
What **type** does the ```type``` **function** retun?
What do you think?
Speculate with your neighbor before you try it out.

```Python
print(type(type(1)))
```

In [None]:
# Copy code here

For you this result may not be spectacular, but for computer science students it is.
It allows us to change the code of the program while it is running;
an ability mostly lost by the introduction of higher-level compiled programming languages
and regained by interpreted programming languages like Python.
If you are interested how it can be used consider searching for [reflective programming](https://en.wikipedia.org/wiki/Reflective_programming).

However, before you drift of on this interesting side-quest I would ask you to apply what we just learned to our cell-**class**.
So please expand the Spike-class with type-hints and some ```TypeErrors``` in the cell below.

In [None]:
# Add your code here

<details>
  <summary>Click to reveal suggested solution</summary>

```Python
def check_type(argument:any, expected_types:list[type]) -> None:
    if not isinstance(expected_types, list):
        raise TypeError(f"The expected_types ({expected_types}) were not supplied as an {list}´, but as a {type(expected_types)} instead.")
    correct_type = False
    for expected_type in expected_types:
        if not isinstance(expected_type, type):
            raise TypeError(f"The expected_type ({expected_type}) was not supplied as an {type}´, but as a {type(expected_type)} instead.")
        if isinstance(argument, expected_type):
            correct_type = True
            break
    if not correct_type:
        raise TypeError(f"{argument} was of type {type(argument)}, which was not expected. The expected types were {expected_types}.")


class Spike:
    """The activity of a neural unit """
    def __init__(self, rat_id: int, unit_id: int, channel: int, spike_time: float):
        """Constrcuts a new spike

        Arguments:
        rat_id -- the identification number of the rat
        unit_id -- the identification number of the neural unit
        channel -- the identification number of the channel
        spike_time -- the time the spike occurs
        """
        check_type(rat_id, [int])
        check_type(unit_id, [int])
        check_type(channel, [int])
        check_type(spike_time, [float])
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    
    def rounded_down_spike_time(self)-> int:
        """Gets the truncated spike time

        The spike time is rounded to an int by truncating the fraction
        """
        return int(self.spike_time)
```

</details>

## Fancy things

You by now you may have realized that classes are for better or worse central to Python.
There are a few more concepts I wish to mention without applying them.
They may come in handy when you try to expand your knowledge later.

### Class-methods

Class-methods are **methods** that are not bound to an **instance** of the **class**,
this means they do not use member-variables via ```self``` like other **methods**.
Instead they take the **class** as an argument, which is usually called ```cls```.
A common use are different constructor methods. Here is a short example:

```Python
class Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    @classmethod
    def from_sequence(cls, sequence):
        """Creates a Spike from a four element sequence like a tuple or a list.
        """
        spike = cls(sequence[0], sequence[1], sequence[2], sequence[3])
        return spike
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)
````

### Static-methods

Static-methods are **methods** that are not bound to an **instance** of the **class** or the **class** itself,
this means they do take neither an **instance** nor the **class** itself as an argument.
They are usually used to associate **functions** with **classes**.
Here for example we have a **function** that checks if we can convert a sequence into a ```Spike```,
because it does not interact with the ```Spike```-**class** directly, but belongs to it we add it as a staticmethod.

```Python
class Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    @classmethod
    def from_sequence(cls, sequence):
        """Creates a Spike from a four element sequence like a tuple or a list.
        """
        spike = cls(sequence[0], sequence[1], sequence[2], sequence[3])
        return spike
    @staticmethod
    def can_build_spike_from_this(sequence):
        if len(sequence) > 4:
            return True
        else:
            for element in [sequence[0], sequence[1], sequence[2]]:
                if not(isinstance(element, int)):
                    return False
            if not(isinstance(sequence[3], float):
                return False
            return True
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)
```

### Inheritance

Let us assume we wish to track the rats temperature in addition to our spikes.
We now need a class that is a spike and has a temperature.
We could now rewrite the ```Spike``` class with all its methods for that or we could reuse it.

If we wish to reuse it, we have two options first,
our temperature-spike-**class** could contain a spike.
This is called [composition](https://en.wikipedia.org/wiki/Object_composition) and represents a “has a”-relationship,
as in “a temperature-spike *has* a spike.”

The second option is [inheritance](https://en.wikipedia.org/wiki/Inheritance_(object-oriented_programming)),
representing an “is a”-relationship, as in “a ```TemperatureSpike``` *is* a ```spike```”.
Since the ```TemperatureSpike``` *is* a ```Spike``` it *inherits* all members (like **methods**) from its *parent*-**class**.
So if the ```Spike```-class has a ```rounded_down_spike_time```-method so has our ```TemperatureSpike```.
This means every **function**/**method** that uses ```Spike``` can also use ```TemperatureSpike``` instead.
Here is an example:


```Python
class Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)

class TemperatureSpike(Spike):
    def __init__(self, rat_id, unit_id, channel, spike_time, temperature):
        # super permits us to access the parent-class
        super().__init__(rat_id, unit_id, channel, spike_time)
        self.temperature = temperature

temperature_spike = TemperatureSpike(1, 0, 2, 1.2, 38.2)
```

Maybe your temperature-spike is also a pressure measurement.
So ```TemperatureSpike``` *is* a ```Spike``` and it is a ```PressureMeasurement```,
so we have to inherit from multiple spruces, like in this example:

```Python
class Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)

class PressureMeasurement:
    def __init__(self, pressure):
        self.pressure = pressure

class TemperatureSpike(Spike, PressureMeasurement):
    def __init__(self, rat_id, unit_id, channel, spike_time, temperature, pressure):
        # super permits us to access the parent-class
        super(Spike).__init__(rat_id, unit_id, channel, spike_time)
        super(PressureMeasurement).__init__(pressure)
        self.temperature = temperature

temperature_spike = TemperatureSpike(1, 0, 2, 1.2, 38.2, 129)
```

I hope you can see that this permits you to split complex systems in small easy to understand parts.
You can achieve similar benefits with composition, but you then have to access the specific part every time,
which can gets cumbersome after a few steps.

Inheritance is a powerful tool and therefore dangerous.
I expect you will not need in it in your immediate future.
I just wanted you to know how what it is and recognize it in the code, once you stumple over it.

### Special methods

There are a number of [special **methods**](https://docs.python.rog/3/reference/datamodel.html#special-method-names) for **classes**,
the most relevant one you already know ```__init__```, creating an **instance** of the **class**.
There are a few other relevant ones ```__str__``` for example is used to give us a human readable representation of the **class**,
so we use it to give us better results when we use formatted strings or ```print```.
A similar **method** is ```__repr__```, it should return a **str**, which can be used to reconstruct the object.
All of them start and end with two ```_```. Let us add and use ```__repr__``` as an example:

```Python
lass Spike:
    def __init__(self, rat_id, unit_id, channel, spike_time):
        self.rat_id = rat_id
        self.unit_id = unit_id
        self.channel = channel
        self.spike_time = spike_time
    def rounded_down_spike_time(self) -> int:
        return int(self.spike_time)
    def __repr__(self):
        return f"Spike({self.rat_id}, {self.unit_id}. {self.channel}, {self.spike_time})"

spike_1 = Spike(1, 0, 2, 1.2)
print(spike_1)
```

### Data-classes

If you ever wanted to store a few **values** without writing a full ```__init__```-**method** [dataclasses](https://docs.python.org/3/tutorial/classes.html#odds-and-ends) are your solution.
They use a decorator like static- and classmethods and then a list of their members with the **types** of these **members**.
Here is a short example:

```Python
from dataclasses import dataclass

@dataclass
class Spike:
    rat_id: int
    unit_id: int
    channel: int
    spike_time: float

spike_1 = Spike(1, 0, 2, 1.2)
print(spike_1)
```

## Modules
You may have noticed that I introduced a new concept here, specifically the line ```from dataclasses import dataclass```, what does it mean?
This is an import statement used to import a module our script.

To understand what modules are we should return to our csv-processing-code:

```Python
def get_maximal_duration_and_units(units_file_path):
    longest_duration = 0
    units = set()

    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_up = int(spike_time) + 1
        if longest_duration < spike_time_rounded_up:
            longest_duration = spike_time_rounded_up
        if unit_id not in units:
            units.add(unit_id)
    units_file.close()
    return (longest_duration, units)

def get_a_list_with_all_the_seconds(longest_duration):
    seconds = list()
    for second in range(1, longest_duration + 1):
        seconds.append(second)
    return seconds

def get_empty_unit_spike_counts(seconds, units):
    empty_spike_counts = dict()
    for unit_id in units:
        empty_spike_count = seconds.copy()
        for index in range(0, len(empty_spike_count)):
            empty_spike_count[index] = 0
        empty_spike_counts[unit_id] = empty_spike_count
    return empty_spike_counts

def get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts):
    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_down = int(spike_time)
        index = spike_time_rounded_down - 1
        units_spike_counts[unit_id][index] += 1
    units_file.close()
    return

def get_immobility_for_every_second(immobility_file_path, seconds):
    is_immobile = seconds.copy()
    phases = list()
    immobility_file = open(immobility_file_path)
    firts_row = True
    for row in immobility_file:
        if firts_row:
            firts_row = False
            continue
        begin_in_seconds, end_in_seconds = row.split(",")
        begin_in_seconds = int(begin_in_seconds)
        end_in_seconds = int(end_in_seconds)
        phase = (begin_in_seconds, end_in_seconds)
        phases.append(phase)
    immobility_file.close()
    for index in range(0, len(is_immobile)):
        second = seconds[index]
        is_in_phase = False
        for phase in phases:
            begin_in_seconds, end_in_seconds = phase
            if second > begin_in_seconds and second < end_in_seconds:
                is_in_phase = True
                break
        is_immobile[index] = is_in_phase
    return is_immobile

# Main part of the script

# Name the file we are going to use
units_file_path = "./data_neuron/session_2023111501010_units.csv"
immobility_file_path = "./data_neuron/session_2023111501010_immobility.csv"

# Create a table
table = list()
# Add a column for the seconds
longest_duration, units = get_maximal_duration_and_units(units_file_path)
seconds = get_a_list_with_all_the_seconds(longest_duration)
table.append(seconds)
# Add columns for the spike-counts
units_spike_counts = get_empty_unit_spike_counts(seconds, units)
get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts)
for unit_id in units_spike_counts.keys():
    table.append(units_spike_counts[unit_id])
# Add a column for the immobility
table.append(get_immobility_for_every_second(immobility_file_path, seconds))

print(table)
```

This is a lot of code and you may have wondered if it is necessary, the answer is yes, but you do not need to write it.
Instead, you can use already existing code from the world-wide-web.
In this case the [csv-module](https://docs.python.org/3/library/csv.html).

Modules are organizational units for code quite similar to classes.
They are usually delivered in packages via a package manager like [pip](https://pypi.org/project/pip/) or [conda]( https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python).
This means you usually have to install them in the command line with a command like ```pip install numpy```,
which installs [numpy](https://numpy.org/), Numpy is used for most calculations in Python,
because it acts as an interface to Fortran, C and C++ libraries,
which due to compile-time-optimization and other design features run much faster than pure Python can.

So if we wish to read in a csv-file, we search for [“Python read csv”](https://www.qwant.com/?l=de&q=Python+read+csv&t=web),
which leads us to the [csv-modules-page]( https://docs.python.org/3/library/csv.html),
where we discover that there is already code to read in csv-files.

If we wish to use a module we have to import it, for which we use the ```import``` statement.
So if we want to use the csv-reader-class from the module we can import the module and the access the class as an attribute of the module:

```Python
import csv

def get_maximal_duration_and_units(units_file_path):
    with open(units_file_path, "r") as csv_file_handle:
        csv_reader = csv.reader(csv_file_handle)
```

Sometimes we do wish to only import a part of the module and not all of it, in this case we use ```from``` infront of ```import```.
We can then directly access the imported object. So our code-snippet would then look like this:

```Python
from csv import reader

def get_maximal_duration_and_units(units_file_path):
    with open(units_file_path, "r") as csv_file_handle:
        csv_reader = reader(csv_file_handle)
```

Especially when names are long or we already have similar named **variables** we may wish to rename what we import.
We can use the keyword ```as``` to rename imports.
So if we would like to rename ```csv``` into ```comma_separated_value_module``` our snippet would look like this:

```Python
import csv as comma_separated_value_module

def get_maximal_duration_and_units(units_file_path):
    with open(units_file_path, "r") as csv_file_handle:
        csv_reader = comma_separated_value_module.reader(csv_file_handle)
```

Equipped with this knowledge I would ask you to use the [offical documentation](https://docs.python.org/3/library/csv.html) to read in the csv-file with the csv-module.
Please partner up and take your time.

In [None]:
# Apply what you have learned to integrate the csv-module instead of out previous code
def get_maximal_duration_and_units(units_file_path):
    longest_duration = 0
    units = set()

    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_up = int(spike_time) + 1
        if longest_duration < spike_time_rounded_up:
            longest_duration = spike_time_rounded_up
        if unit_id not in units:
            units.add(unit_id)
    units_file.close()
    return (longest_duration, units)

def get_a_list_with_all_the_seconds(longest_duration):
    seconds = list()
    for second in range(1, longest_duration + 1):
        seconds.append(second)
    return seconds

def get_empty_unit_spike_counts(seconds, units):
    empty_spike_counts = dict()
    for unit_id in units:
        empty_spike_count = seconds.copy()
        for index in range(0, len(empty_spike_count)):
            empty_spike_count[index] = 0
        empty_spike_counts[unit_id] = empty_spike_count
    return empty_spike_counts

def get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts):
    units_file = open(units_file_path)
    firts_row = True
    for row in units_file:
        if firts_row:
            firts_row = False
            continue
        rat_id, unit_id, channel, spike_time = row.split(",")
        unit_id = int(unit_id)
        spike_time = float(spike_time)
        spike_time_rounded_down = int(spike_time)
        index = spike_time_rounded_down - 1
        units_spike_counts[unit_id][index] += 1
    units_file.close()
    return

def get_immobility_for_every_second(immobility_file_path, seconds):
    is_immobile = seconds.copy()
    phases = list()
    immobility_file = open(immobility_file_path)
    firts_row = True
    for row in immobility_file:
        if firts_row:
            firts_row = False
            continue
        begin_in_seconds, end_in_seconds = row.split(",")
        begin_in_seconds = int(begin_in_seconds)
        end_in_seconds = int(end_in_seconds)
        phase = (begin_in_seconds, end_in_seconds)
        phases.append(phase)
    immobility_file.close()
    for index in range(0, len(is_immobile)):
        second = seconds[index]
        is_in_phase = False
        for phase in phases:
            begin_in_seconds, end_in_seconds = phase
            if second > begin_in_seconds and second < end_in_seconds:
                is_in_phase = True
                break
        is_immobile[index] = is_in_phase
    return is_immobile

# Main part of the script

# Name the file we are going to use
units_file_path = "./data_neuron/session_2023111501010_units.csv"
immobility_file_path = "./data_neuron/session_2023111501010_immobility.csv"

# Create a table
table = list()
# Add a column for the seconds
longest_duration, units = get_maximal_duration_and_units(units_file_path)
seconds = get_a_list_with_all_the_seconds(longest_duration)
table.append(seconds)
# Add columns for the spike-counts
units_spike_counts = get_empty_unit_spike_counts(seconds, units)
get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts)
for unit_id in units_spike_counts.keys():
    table.append(units_spike_counts[unit_id])
# Add a column for the immobility
table.append(get_immobility_for_every_second(immobility_file_path, seconds))

print(table)

<details>
  <summary>Click to reveal suggested solution</summary>

```Python
import csv

def get_maximal_duration_and_units(units_file_path):
    longest_duration = 0
    units = set()

    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            spike_time = float(row["spikeTimes"])
            spike_time_rounded_up = int(spike_time) + 1
            if longest_duration < spike_time_rounded_up:
                longest_duration = spike_time_rounded_up
            if unit_id not in units:
                units.add(unit_id)
    return (longest_duration, units)

def get_a_list_with_all_the_seconds(longest_duration):
    seconds = list()
    for second in range(1, longest_duration + 1):
        seconds.append(second)
    return seconds

def get_empty_unit_spike_counts(seconds, units):
    empty_spike_counts = dict()
    for unit_id in units:
        empty_spike_count = seconds.copy()
        for index in range(0, len(empty_spike_count)):
            empty_spike_count[index] = 0
        empty_spike_counts[unit_id] = empty_spike_count
    return empty_spike_counts

def get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts):
    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            spike_time = float(row["spikeTimes"])
            spike_time_rounded_down = int(spike_time)
            index = spike_time_rounded_down - 1
            units_spike_counts[unit_id][index] += 1
    return

def get_immobility_for_every_second(immobility_file_path, seconds):
    is_immobile = seconds.copy()
    phases = list()
    with open(immobility_file_path, "r") as immobility_file:
        reader = csv.DictReader(immobility_file)
        for row in reader:
            begin_in_seconds = int(row["begin in seconds"])
            end_in_seconds = int(row["end in seconds"])
            phase = (begin_in_seconds, end_in_seconds)
            phases.append(phase)
    for index in range(0, len(is_immobile)):
        second = seconds[index]
        is_in_phase = False
        for phase in phases:
            begin_in_seconds, end_in_seconds = phase
            if second > begin_in_seconds and second < end_in_seconds:
                is_in_phase = True
                break
        is_immobile[index] = is_in_phase
    return is_immobile

# Main part of the script

# Name the file we are going to use
units_file_path = "./data_neuron/session_2023111501010_units.csv"
immobility_file_path = "./data_neuron/session_2023111501010_immobility.csv"

# Create a table
table = list()
# Add a column for the seconds
longest_duration, units = get_maximal_duration_and_units(units_file_path)
seconds = get_a_list_with_all_the_seconds(longest_duration)
table.append(seconds)
# Add columns for the spike-counts
units_spike_counts = get_empty_unit_spike_counts(seconds, units)
get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts)
for unit_id in units_spike_counts.keys():
    table.append(units_spike_counts[unit_id])
# Add a column for the immobility
table.append(get_immobility_for_every_second(immobility_file_path, seconds))

print(table)
```

</details>

## Building Python modules

You may now wonder where Python modules come from or more specifically how to write them.
Considering that you will probably not desire to publish your code in the near future let me just roughly walk you through the process.
First you consult the [tutorial](https://packaging.python.org/en/latest/tutorials/packaging-projects/),
which tells you how to write a project-[toml](https://de.wikipedia.org/wiki/TOML).

<details>
  <summary>Here is an example from one of my projects</summary>

```TOML
[build-system]
requires = ["hatchling >= 1.26"]
build-backend = "hatchling.build"

[project]
name = "rat_call_meta"
version = "0.0.1"
authors = [
    {name="Einsied", email="sebastian.einsiedler@zi-mannheim.de"}
]
description = "Meta elements to mark rat vocalizations"
readme = "README.md"
requires-python = ">=3.13"
depdendencies = [
    "numpy >= 2.2.6"
]
classifiers = [
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
]
```

</details>

After you wrote it and placed your files in a ```src/<package-name>``` (in my case ```src/rat_call_meta```) folder you run the following commands:

```Bash
python3 -m pip install --upgrade build
python3 -m build
```

To install it locally you run:
```Bash
pip install <package-name> --no-index --find-links dist
```

Replacing ```<package-name>``` with the name of your package, which in my case resulted in:

```Bash
pip install rat_call_meta --no-index --find-links dist
```

You can now use your newly made module.

## Adopting the code

With this new knowledge, we can now simplify our code, by using [pathlib](https://matplotlib.org/stable/users/explain/quick_start.html) and [numpy](https://numpy.org/doc/).
Instead of our manual binning we can use ```numpy.histogram``` and store the paths with ```pathlib```.
Consulting the [documentation for ```numpy.histogram```](https://numpy.org/devdocs/reference/generated/numpy.histogram.html),
we learn that we can bin a range of numbers directly.
This means we no longer need the seconds and can remove the ```get_maximal_duration_and_units``` and ```get_a_list_with_all_the_seconds```,
by adapting ```get_amount_of_spikes_for_every_second``` to ```get_spike_times_for_all_units```.
This also means that ```get_empty_unit_spike_counts``` becomes obsolete.
Let us take a look at the **function** before and after the transformation:

```Python
def get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts):
    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            spike_time = float(row["spikeTimes"])
            spike_time_rounded_down = int(spike_time)
            index = spike_time_rounded_down - 1
            units_spike_counts[unit_id][index] += 1
    return

def get_spike_times_for_all_units(units_file_path):
    unit_spike_times = dict()
    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            if unit_id not in unit_spike_times:
                unit_spike_times[unit_id] = list()
            spike_time = float(row["spikeTimes"])
            unit_spike_times[unit_id].append(spike_time)
    return unit_spike_times
```

To reduce the complexity further we transform ```get_amount_of_spikes_for_every_second``` into ```get_immobility_phases```:

```Python
def get_immobility_for_every_second(immobility_file_path, seconds):
    is_immobile = seconds.copy()
    phases = list()
    with open(immobility_file_path, "r") as immobility_file:
        reader = csv.DictReader(immobility_file)
        for row in reader:
            begin_in_seconds = int(row["begin in seconds"])
            end_in_seconds = int(row["end in seconds"])
            phase = (begin_in_seconds, end_in_seconds)
            phases.append(phase)
    for index in range(0, len(is_immobile)):
        second = seconds[index]
        is_in_phase = False
        for phase in phases:
            begin_in_seconds, end_in_seconds = phase
            if second > begin_in_seconds and second < end_in_seconds:
                is_in_phase = True
                break
        is_immobile[index] = is_in_phase
    return is_immobile

def get_immobility(immobility_file_path):
    phases = list()
    with open(immobility_file_path, "r") as immobility_file:
        reader = csv.DictReader(immobility_file)
        for row in reader:
            begin_in_seconds = int(row["begin in seconds"])
            end_in_seconds = int(row["end in seconds"])
            phase = (begin_in_seconds, end_in_seconds)
            phases.append(phase)
    return phases
```

This means that the main part of the code gets shorter as well:

```Python
# Old
# Name the file we are going to use
units_file_path = "./data_neuron/session_2023111501010_units.csv"
immobility_file_path = "./data_neuron/session_2023111501010_immobility.csv"

# Create a table
table = list()
# Add a column for the seconds
longest_duration, units = get_maximal_duration_and_units(units_file_path)
seconds = get_a_list_with_all_the_seconds(longest_duration)
table.append(seconds)
# Add columns for the spike-counts
units_spike_counts = get_empty_unit_spike_counts(seconds, units)
get_amount_of_spikes_for_every_second(units_file_path, units_spike_counts)
for unit_id in units_spike_counts.keys():
    table.append(units_spike_counts[unit_id])
# Add a column for the immobility
table.append(get_immobility_for_every_second(immobility_file_path, seconds))

print(table)


# New
data_path = pathlib.Path("./data_neuron/")
units_file_path = data_path / "session_2023111501010_units.csv"
immobility_file_path = data_path/  "session_2023111501010_immobility.csv"

spikes_times_units = get_spike_times_for_all_units(units_file_path)
max_duration = 0
for unit_id in spikes_times_units.keys():
    max_time_unit = max(spikes_times_units[unit_id])
    max_duration = max(max_time_unit, max_duration)
immobility = get_immobility(immobility_file_path)
```

Now we can use ```numpy.histogram``` to get the number of spikes per second.
For this we have to generate some bins.
As you recall we sorted the spike times into seconds, these seconds where our bins or buckets.
```numpy.histogram``` permits us to use arbitrary bins instead.
We generate them witn ```range``` or if we want non **int** step-sizes ```numpy.arange```.
So we can get our bins using ```max_duration```:

```Python
bins = numpy.arange(0, max_duration)
```

Please combine the code snippets above to create the histograms for a bin size of ```1```:

In [None]:
# Your code should be added here

<details>
  <summary>Click to reveal suggested solution</summary>

```Python
import csv
import numpy

def get_spike_times_for_all_units(units_file_path):
    unit_spike_times = dict()
    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            if unit_id not in unit_spike_times.keys():
                unit_spike_times[unit_id] = list()
            spike_time = float(row["spikeTimes"])
            unit_spike_times[unit_id].append(spike_time)
    return unit_spike_times

def get_immobility(immobility_file_path):
    phases = list()
    with open(immobility_file_path, "r") as immobility_file:
        reader = csv.DictReader(immobility_file)
        for row in reader:
            begin_in_seconds = int(row["begin in seconds"])
            end_in_seconds = int(row["end in seconds"])
            phase = (begin_in_seconds, end_in_seconds)
            phases.append(phase)
    return phases

data_path = pathlib.Path("./data_neuron/")
units_file_path = data_path / "session_2023111501010_units.csv"
immobility_file_path = data_path/  "session_2023111501010_immobility.csv"

spikes_times_units = get_spike_times_for_all_units(units_file_path)
max_duration = 0
for unit_id in spikes_times_units.keys():
    max_time_unit = max(spikes_times_units[unit_id])
    max_duration = max(max_time_unit, max_duration)
immobility = get_immobility(immobility_file_path)
bins = numpy.arange(0, max_duration)

spikes_per_second_units = dict()
for unit_id in spikes_times_units.keys():
    histogram, bin_edges = numpy.histogram(spikes_times_units[unit_id], bins=bins)
    spikes_per_second_units[unit_id] = histogram
print(spikes_per_second_units)
```

</details>

## Visualizing data

Now that you know a little bit more about modules, it is time to put visualize our results.
As you may recall our original task was to detect the oxytocin unit.
We built the tools to extract the data now it is time to generate plots to find the unit.
For this we will use [matplotlib](https://matplotlib.org/stable/users/explain/quick_start.html).

Your task is now to plot a histogram for every unit.
While you could use ```matplotlib.pyplot.hist```, it is easier to use ```matplotlib.pyplot.stairs```.
For the immobility you should search the documentation for ```axvspan```.
Good luck!

In [None]:
# Write your code here

<details>
  <summary>Click to reveal suggested solution</summary>

```Python
import csv
import pathlib
import matplotlib.pyplot

def get_spike_times_for_all_units(units_file_path):
    unit_spike_times = dict()
    with open(units_file_path, "r") as units_file:
        reader = csv.DictReader(units_file)
        for row in reader:
            unit_id = int(row["unitID"])
            if unit_id not in unit_spike_times.keys():
                unit_spike_times[unit_id] = list()
            spike_time = float(row["spikeTimes"])
            unit_spike_times[unit_id].append(spike_time)
    return unit_spike_times

def get_immobility(immobility_file_path):
    phases = list()
    with open(immobility_file_path, "r") as immobility_file:
        reader = csv.DictReader(immobility_file)
        for row in reader:
            begin_in_seconds = int(row["begin in seconds"])
            end_in_seconds = int(row["end in seconds"])
            phase = (begin_in_seconds, end_in_seconds)
            phases.append(phase)
    return phases

data_path = pathlib.Path("./data_neuron/")
units_file_path = data_path / "session_2023111501010_units.csv"
immobility_file_path = data_path/  "session_2023111501010_immobility.csv"

spikes_times_units = get_spike_times_for_all_units(units_file_path)
max_duration = 0
for unit_id in spikes_times_units.keys():
    max_time_unit = max(spikes_times_units[unit_id])
    max_duration = max(max_time_unit, max_duration)
immobility = get_immobility(immobility_file_path)
bins = numpy.arange(0, max_duration)

spikes_per_second_units = dict()
for unit_id in spikes_times_units.keys():
    histogram, bin_edges = numpy.histogram(spikes_times_units[unit_id], bins=bins)
    spikes_per_second_units[unit_id] = histogram

for unit in spikes_per_second_units.keys():
    matplotlib.pyplot.title(f"Unit {unit}")   
    # plot invervals to compare
    for begin, end in immobility:
         matplotlib.pyplot.axvspan(begin, end, facecolor='black', alpha=0.2)
    matplotlib.pyplot.stairs(spikes_per_second_units[unit], bins)
    matplotlib.pyplot.xlabel("Time in seconds")
    matplotlib.pyplot.ylabel("Frequency in Hz")
    matplotlib.pyplot.ylim((0, 130))
    matplotlib.pyplot.show()
```

</details>

Investigating the different plots reveals that "unit 0" has 7 bursts during immobility and is therefore the search oxytocin unit.

I assume that after the last exercise there are still a few questions open.
Please ask them now.