In [1]:
import "io/ioutil"
import "regexp"
import "strings"
import "os"
import "fmt"
import "strconv"
import "math"
import "reflect"

func check(err error) {
    if err != nil {
        fmt.Println("Error")
        panic(err)
    }
}

skipped := 0 // number of skipped packets

###  Open the data dump (can be directly a unix device)

In [2]:
data, err := ioutil.ReadFile("image.dd")
check(err)

### Get data that correspond to the packet structure using regex

*Please note that this regex is not optimized at all, it is very slow and not very readable and still gives false negatives/positives.*

Floats look like this: `-?\d*\.?\d*e?-?\d*` (quantifiers changes depending on the context of the group)
- `-?` Minus sign or not
- `\d*` Integer digit(s)
- `.?` Decimal marker
- `\d*` Decimal digit(s)
- `e?` Power of ten (Python uses scientific number notation)
- `-?` Power of ten sign
- `\d*` Power of ten digit(s)

Match any first incomplete line:
- `(?:(?:-?\d*\.?\d*e?-?\d*,)*`: Multiple floats, ending with a comma. Almost everything is with `*` or `?` (0 or more & 0 or 1) because the line might be incomplete, and a float can only contain a single digit for example
- `(?:-?\d*\.?\d*e?-?\d*\n?))?`: End of the first (possibly incomplete line)

Match full lines, containing 9 items separated by comma, and an extra one ending with a newline:
`(?:(?:-?\d+\.\d+e?-?\d*,){9}-*\d+\.\d+e?-?\d*\n)+`

Match any line with incomplete end: `(?:(?:-?\d*\.?\d+e?-?\d*,?)*)`

At the end we concat every match together, most of the time the incomplete last lines of a match give us a complete line when stitched together with the following incomplete first line.\
Example:
- Incomplete last line of a match: `49.665,0.117979,0.317355,0.723516,0.00864801,-0.00477252,0`
- Incomplete first line of the following match: `.0164411,1017.45,-0.190244,24.8807`
- Together: `49.665,0.117979,0.317355,0.723516,0.00864801,-0.00477252,0.0164411,1017.45,-0.190244,24.8807` => Complete packet!


In [3]:
re := regexp.MustCompile(`(?m)(?:(?:-?\d*\.?\d+(e-?\d+)?,)*(?:-?\d*\.\d+(e-?\d+)?\n))?(?:(?:-?\d+\.\d+(e-?\d+)?,){9}-*\d+\.\d+(e-?\d+)?\n)+(?:(?:-?\d+\.\d+(e-?\d+)?,?)*)`)
matches := ""

for _, match := range re.FindAllString(string(data), -1) {
    matches += match
    check(err)
}

### Parse every line into a struct, and filter

Remove packets where the number of items is incorrect, or where a value is too high (arbitrarily 6000)

In [4]:
type Packet struct {
	time float64
	accX float64
	accY float64
	accZ float64
	gyrX float64
	gyrY float64
	gyrZ float64
	pres float64
	alt  float64
	temp float64
}

var packets []Packet

packet_loop:
for _, line_str := range strings.Split(matches, "\n") {
	line := strings.Split(line_str, ',')
	p := Packet{}

	if len(line) != 10 {
		skipped++
		continue
	}

	for _, val := range line {
		val, err := strconv.ParseFloat(val, 32)
		check(err)
		if math.Abs(val) > 6000 {
			skipped++
			continue packet_loop
		}
	}

	p.time, _ = strconv.ParseFloat(line[0], 32)
	p.accX, err = strconv.ParseFloat(line[1], 32)
	check(err)
	p.accY, err = strconv.ParseFloat(line[2], 32)
	check(err)
	p.accZ, err = strconv.ParseFloat(line[3], 32)
	check(err)
	p.gyrX, err = strconv.ParseFloat(line[4], 32)
	check(err)
	p.gyrY, err = strconv.ParseFloat(line[5], 32)
	check(err)
	p.gyrZ, err = strconv.ParseFloat(line[6], 32)
	check(err)
	p.pres, err = strconv.ParseFloat(line[7], 32)
	check(err)
	p.alt, err =  strconv.ParseFloat(line[8], 32)
	check(err)
	p.temp, err = strconv.ParseFloat(line[9], 32)
	check(err)
	
	packets = append(packets, p)
}

### Group packets

We group packets depending on the timestamp. Packets are put in the same group until the time delta between the last packet *(i-1)* and the current one *(i)* is too long (arbitrarily 1.5s), or if the current packet's *(i)* time is behind last packet's *(i-1)* time.

We also check for false postitives: we do the same checks but between the last packet *(i-1)* and the next one *(i+1)*, because sometimes a line is malformed, and so the time is detected as a new group. Example: 
```44.211,0.0478039,0.0302162,0.515105,-0.0313193,0.0166487,0.0193122,1017.46,-0.422764,24.8781
44.513,0.0460923,0.0320013,0.522003,-0.0313287,0.0169571,0.0194658,1017.45,-0.0845528,24.8807
4.821,0.0443463,0.0352771,0.529401,-0.0310598,0.0170395,0.0199473,1017.45,-0.147967,24.8756 <--- Incorrect, skipped
45.123,0.0458433,0.0367056,0.532733,-0.0308872,0.0173024,0.0199188,1017.44,-0.190244,24.8703
```

In [5]:
i := 1
var packet_groups [][]Packet
var packet_group []Packet
for i < len(packets) {
	// time between this packet and the previous one is bigger than 1.5 secs, put it into a new group
	if packets[i].time - packets[i-1].time > 1.5 || packets[i].time <= packets[i-1].time {
        // BUT if the next packet is close to the previous packet, consider the actual packet to be corrupted, skip
        if packets[i+1].time - packets[i-1].time < 1.5 && packets[i+1].time > packets[i-1].time {
            i++
			skipped++
            continue
        }
		packet_groups = append(packet_groups, packet_group)
		packet_group = nil
	}

	packet_group = append(packet_group, packets[i])
	i++
}

fmt.Sprintf("%d groups", len(packet_groups))

25 groups

###  Remove duplicates

Somehow almost every group is duplicated in the data dump, so we remove them here

In [6]:
packet_groups_filtered := [][]Packet{}

packet_filter_loop:
for _, packet_group_a := range(packet_groups) {
	for _, packet_group_b := range(packet_groups_filtered) {
		if reflect.DeepEqual(packet_group_a, packet_group_b) {
			continue packet_filter_loop
		}
	}
	packet_groups_filtered = append(packet_groups_filtered, packet_group_a)
}

###  Write every group to CSV

In [7]:
for i, group := range packet_groups_filtered {
    content := "time,accX,accY,accZ,gyrX,gyrY,gyrZ,pres,alt,temp\n"
    for _, packet := range group {
        content += fmt.Sprintf("%.3f,", packet.time)
        content += fmt.Sprintf("%.3f,", packet.accX)
        content += fmt.Sprintf("%.3f,", packet.accY)
        content += fmt.Sprintf("%.3f,", packet.accZ)
        content += fmt.Sprintf("%.3f,", packet.gyrX)
        content += fmt.Sprintf("%.3f,", packet.gyrY)
        content += fmt.Sprintf("%.3f,", packet.gyrZ)
        content += fmt.Sprintf("%.3f,", packet.pres)
        content += fmt.Sprintf("%.3f,", packet.alt)
        content += fmt.Sprintf("%.3f\n", packet.temp)
    }
    err := ioutil.WriteFile(fmt.Sprintf("recover/%d.csv", i), []byte(content), 0644)
    check(err)
}

In [8]:
fmt.Sprintf("Skipped %d packets", skipped)

Skipped 32 packets