Skip to content

Commit

Permalink
[metadata] Better parsing
Browse files Browse the repository at this point in the history
Fixes #189
  • Loading branch information
clee committed May 15, 2020
1 parent d30b12e commit 866d8f1
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 25 deletions.
10 changes: 3 additions & 7 deletions metadata/helpers/sanitize.go
Expand Up @@ -2,19 +2,15 @@
package helpers

import (
"path/filepath"
"regexp"
"strings"
)

// Sanitize prepares a filename for parsing. It tries to remove dots and other weird characters to make parsing easier.
func Sanitize(filename string) string {
// Remove extension
filename = strings.TrimSuffix(filename, filepath.Ext(filename))

filename = strings.Replace(filename, ".", " ", -1)
filename = strings.Replace(filename, "_", " ", -1)
filename = strings.Replace(filename, "’", "'", -1)
sanitizeRegEx := regexp.MustCompile(`([\._‘](| ))`)
filename = sanitizeRegEx.ReplaceAllString(filename, " ")
filename = strings.Replace(filename, " ", " ", -1)
filename = strings.Trim(filename, " ")
filename = strings.Trim(filename, " -")
return filename
Expand Down
7 changes: 5 additions & 2 deletions metadata/parsers/movies.go
Expand Up @@ -2,11 +2,13 @@
package parsers

import (
log "github.com/sirupsen/logrus"
"gitlab.com/olaris/olaris-server/metadata/helpers"
"path/filepath"
"regexp"
"strconv"
"strings"

log "github.com/sirupsen/logrus"
"gitlab.com/olaris/olaris-server/metadata/helpers"
)

// ParsedMovieInfo holds extracted information from the given filename.
Expand All @@ -23,6 +25,7 @@ func ParseMovieName(fileName string) *ParsedMovieInfo {
psi := ParsedMovieInfo{}
var err error
var year string
fileName = strings.TrimSuffix(fileName, filepath.Ext(fileName))

res := movieRe.FindStringSubmatch(fileName)

Expand Down
15 changes: 7 additions & 8 deletions metadata/parsers/series.go
Expand Up @@ -10,11 +10,11 @@ import (
"gitlab.com/olaris/olaris-server/metadata/helpers"
)

var yearRegex = regexp.MustCompile("([\\[\\(]?((?:19[0-9]|20[01])[0-9])[\\]\\)]?)")
var yearRegex = regexp.MustCompile("([\\[\\(]?((19|20)\\d{2})[\\]\\)]?)")
var seriesRegex = regexp.MustCompile("^(.*)S(\\d{1,2})E(\\d{1,2})")
var seriesFallbackRegex = regexp.MustCompile("^(.*)(\\d{1,2})x(\\d{1,2})")

var seasonRegex = regexp.MustCompile("season.*?([0-9]{1,3})")
var seasonRegex = regexp.MustCompile("[Ss](eason|)\\s?(\\d{1,3})")
var firstNumberRegex = regexp.MustCompile("[0-9]{1,3}")

// ParsedSeriesInfo holds extracted information from the given filename.
Expand All @@ -31,6 +31,7 @@ func (psi *ParsedSeriesInfo) logFields() log.Fields {

// ParseSeriesName attempts to parse a filename looking for episode/season information.
func ParseSeriesName(filePath string) *ParsedSeriesInfo {
filePath = strings.TrimSuffix(filePath, filepath.Ext(filePath))
fileName := filepath.Base(filePath)
log.WithFields(log.Fields{"filename": fileName}).Debugln("Parsing filename for episode information.")
var err error
Expand All @@ -39,17 +40,17 @@ func ParseSeriesName(filePath string) *ParsedSeriesInfo {
log.WithFields(p.logFields()).Debugln("Done parsing episode.")
}(&psi)

yearResult := yearRegex.FindStringSubmatch(fileName)
if len(yearResult) > 1 {
yearResult := yearRegex.FindStringSubmatch(filePath)
if len(yearResult) > 0 {
yearString := yearResult[2]
log.WithFields(log.Fields{"year": yearString}).Println("Found release year.")
// Remove Year data from original fileName
// Remove Year data from original filePath and fileName
filePath = strings.Replace(filePath, yearResult[1], "", -1)
fileName = strings.Replace(fileName, yearResult[1], "", -1)
psi.Year = yearString
if err != nil {
log.WithError(err).Warnln("Could not convert year to uint")
}
log.WithFields(log.Fields{"filename": fileName}).Debugln("Removed year from episode information to create new title.", fileName)
}

// Find out episode numbers
Expand Down Expand Up @@ -97,8 +98,6 @@ func ParseSeriesName(filePath string) *ParsedSeriesInfo {
log.WithError(err).Debugln("Could not convert episode to uint: ")
}

psi.Year = ""

seriesName := filepath.Base(filepath.Dir(filepath.Dir(filePath)))
psi.Title = helpers.Sanitize(seriesName)

Expand Down
23 changes: 15 additions & 8 deletions metadata/parsers/series_test.go
Expand Up @@ -6,26 +6,33 @@ import (

func TestParseSeriesName(t *testing.T) {
tests := make(map[string]ParsedSeriesInfo)
tests["Battlestar Galactica - S01E04 (1978)"] = ParsedSeriesInfo{Year: "1978", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar Galactica - S02E03"] = ParsedSeriesInfo{Year: "", Title: "Battlestar Galactica", EpisodeNum: 3, SeasonNum: 2}
tests["Battlestar Galactica - S2E3"] = ParsedSeriesInfo{Year: "", Title: "Battlestar Galactica", EpisodeNum: 3, SeasonNum: 2}
tests["This does not Exist"] = ParsedSeriesInfo{Year: "", Title: "This does not Exist", EpisodeNum: 0, SeasonNum: 0}
tests["Angel.3x2.avi"] = ParsedSeriesInfo{Year: "", Title: "Angel", EpisodeNum: 2, SeasonNum: 3}
tests["Battlestar Galactica - S01E04 (1978).m4v"] = ParsedSeriesInfo{Year: "1978", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar.Galactica.1978.S01E04 - The Lost Planet of the Gods (1).mkv"] = ParsedSeriesInfo{Year: "1978", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar Galactica (1978)/s1/04. The Lost Planet of the Gods part 1.mp4"] = ParsedSeriesInfo{Year: "1978", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar Galactica (1978)/season 1/04. The Lost Planet of the Gods.mkv"] = ParsedSeriesInfo{Year: "1978", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar Galactica/s1/04. The Lost Planet of the Gods.mkv"] = ParsedSeriesInfo{Year: "", Title: "Battlestar Galactica", EpisodeNum: 4, SeasonNum: 1}
tests["Battlestar Galactica (2003) - S02E03.mp4"] = ParsedSeriesInfo{Year: "2003", Title: "Battlestar Galactica", EpisodeNum: 3, SeasonNum: 2}
tests["Battlestar Galactica - S2E3.wmv"] = ParsedSeriesInfo{Year: "", Title: "Battlestar Galactica", EpisodeNum: 3, SeasonNum: 2}
tests["Battlestar.Galactica.-.S02E03.mkv"] = ParsedSeriesInfo{Year: "", Title: "Battlestar Galactica", EpisodeNum: 3, SeasonNum: 2}
tests["Angel.3x2"] = ParsedSeriesInfo{Year: "", Title: "Angel", EpisodeNum: 2, SeasonNum: 3}
tests["Mr. Robot (2016).S01E04 - eps1.3_da3m0ns.mp4.mkv"] = ParsedSeriesInfo{Year: "2016", Title: "Mr Robot", EpisodeNum: 4, SeasonNum: 1}
tests["Mr. Robot (2016) - S01E04.mpg"] = ParsedSeriesInfo{Year: "2016", Title: "Mr Robot", EpisodeNum: 4, SeasonNum: 1}
tests["Mr. Robot/Season 2/03.m2ts"] = ParsedSeriesInfo{Year: "", Title: "Mr Robot", EpisodeNum: 3, SeasonNum: 2}
tests["This does not Exist"] = ParsedSeriesInfo{Year: "", Title: "This does not Exist", EpisodeNum: 0, SeasonNum: 0}

for name, mi := range tests {
t.Log("running test on:", name)
newMi := ParseSeriesName(name)
if newMi.Year != mi.Year {
t.Errorf("Year %v did not match expected year %v\n", newMi.Year, mi.Year)
t.Errorf("Year [%v] did not match expected year [%v] for input [%v]\n", newMi.Year, mi.Year, name)
}

if newMi.EpisodeNum != mi.EpisodeNum {
t.Errorf("Episode %v did not match expected episode %v\n", newMi.EpisodeNum, mi.EpisodeNum)
t.Errorf("Episode [%v] did not match expected episode [%v] for input [%v]\n", newMi.EpisodeNum, mi.EpisodeNum, name)
}

if newMi.Title != mi.Title {
t.Errorf("Title %v did not match expected Title %v\n", newMi.Title, mi.Title)
t.Errorf("Title [%v] did not match expected title [%v] for input [%v]\n", newMi.Title, mi.Title, name)
}
}
}

0 comments on commit 866d8f1

Please sign in to comment.