In [1]:
import os
import re
import fnmatch
import pandas as pd

path = "F:/Data/Cinémathèque"  # F:\Data\Cinémathèque
pattern = "*.mkv"

In [2]:
def list_lstrip(list_to_clean):
    list_cleaned = []
    for string in list_to_clean:
        list_cleaned.append(string.lstrip())
        
    return list_cleaned

In [3]:
def get_information(file_name):
    regex = re.findall('(\(\d\d\d\d\))', file_name)  # Find the date
    try:
        name, end = file_name.split(regex[0])  # Get name and director
    except Exception as e:
        print(e)
        print(file_name + '\n')
        return {
            'name': file_name,
            'date': None,
            'directors': [],  # Get Directors list,
            'details': []
        }    

    name = name[:-1]  # Supress useless character for the name

    end = end.split(' [')
    if len(end) < 2:
        director = end[0][1:]  # Supress useless character for the director
        detail = []
    else:
        director = end[0]
        detail = end[1][:-1]  # Supress useless character for the details
        detail = detail.split('][')  # Get details list
    
    # Save the date
    return {
        'name': name.lstrip(),
        'date': regex[0].replace('(', '').replace(')', ''),
        'directors': list_lstrip(director.split(', ')),  # Get Directors list,
        'details': list_lstrip(detail)
    }

In [4]:
def get_mkv_list(path="D:/Cinémathèque/", pattern="*.mkv"):
    file_list = []
    for root, dirs, files in os.walk(path):
        if root == path:  # To prevent subfolder analyze
            for file in files:
                if fnmatch.fnmatch(file, pattern):  # to get only with ".mkv" extension
                    file_list.append({'path': path, 'file': file[:-len(pattern)+1], 'extension': pattern[1:]})
    return pd.DataFrame(file_list)

mkv_list = get_mkv_list(path=path, pattern=pattern)
mkv_list

Unnamed: 0,path,file,extension
0,F:/Data/Cinémathèque,(500) Jours Ensemble (2009) Marc Webb,.mkv
1,F:/Data/Cinémathèque,1001 pattes (1998) John Lasseter,.mkv
2,F:/Data/Cinémathèque,12 Hommes en Colere (1957) Sidney Lumet,.mkv
3,F:/Data/Cinémathèque,12 Years a Slave (2013) Steve McQueen,.mkv
4,F:/Data/Cinémathèque,120 Battements par Minute (2017) Robin Campillo,.mkv
...,...,...,...
1205,F:/Data/Cinémathèque,Zombie (1978) George A. Romero,.mkv
1206,F:/Data/Cinémathèque,"Zootopia (2016) Rich Moore, Byron Howard, Jare...",.mkv
1207,F:/Data/Cinémathèque,À Bord du Darjeeling Limited (2007) Wes Anderson,.mkv
1208,F:/Data/Cinémathèque,À bout de souffle (1960) Jean-Luc Godard,.mkv


In [5]:
data = []

for idx, row in mkv_list.iterrows():
    file_name = row.file

    data.append(get_information(file_name))
    
    
files_details = pd.DataFrame(data)
files_details

list index out of range
American History X Deleted Scenes

list index out of range
Conte des chrysanthemes tardifs 1939 VOSTF DVDRIP HEVC AZAZE

list index out of range
Like Someone in Love (Abbas Kiarostami) 2012

list index out of range
MACROSS PLUS - 01 [1080p]

list index out of range
MACROSS PLUS - 02 [1080p]

list index out of range
MACROSS PLUS - 03 [1080p]

list index out of range
MACROSS PLUS - 04 [1080p]

list index out of range
Pixar Intro Old



Unnamed: 0,name,date,directors,details
0,(500) Jours Ensemble,2009,[Marc Webb],[]
1,1001 pattes,1998,[John Lasseter],[]
2,12 Hommes en Colere,1957,[Sidney Lumet],[]
3,12 Years a Slave,2013,[Steve McQueen],[]
4,120 Battements par Minute,2017,[Robin Campillo],[]
...,...,...,...,...
1205,Zombie,1978,[George A. Romero],[]
1206,Zootopia,2016,"[Rich Moore, Byron Howard, Jared Bush]",[]
1207,À Bord du Darjeeling Limited,2007,[Wes Anderson],[]
1208,À bout de souffle,1960,[Jean-Luc Godard],[]


In [6]:
mkv_detail = pd.concat([mkv_list, files_details], axis=1)
mkv_detail

Unnamed: 0,path,file,extension,name,date,directors,details
0,F:/Data/Cinémathèque,(500) Jours Ensemble (2009) Marc Webb,.mkv,(500) Jours Ensemble,2009,[Marc Webb],[]
1,F:/Data/Cinémathèque,1001 pattes (1998) John Lasseter,.mkv,1001 pattes,1998,[John Lasseter],[]
2,F:/Data/Cinémathèque,12 Hommes en Colere (1957) Sidney Lumet,.mkv,12 Hommes en Colere,1957,[Sidney Lumet],[]
3,F:/Data/Cinémathèque,12 Years a Slave (2013) Steve McQueen,.mkv,12 Years a Slave,2013,[Steve McQueen],[]
4,F:/Data/Cinémathèque,120 Battements par Minute (2017) Robin Campillo,.mkv,120 Battements par Minute,2017,[Robin Campillo],[]
...,...,...,...,...,...,...,...
1205,F:/Data/Cinémathèque,Zombie (1978) George A. Romero,.mkv,Zombie,1978,[George A. Romero],[]
1206,F:/Data/Cinémathèque,"Zootopia (2016) Rich Moore, Byron Howard, Jare...",.mkv,Zootopia,2016,"[Rich Moore, Byron Howard, Jared Bush]",[]
1207,F:/Data/Cinémathèque,À Bord du Darjeeling Limited (2007) Wes Anderson,.mkv,À Bord du Darjeeling Limited,2007,[Wes Anderson],[]
1208,F:/Data/Cinémathèque,À bout de souffle (1960) Jean-Luc Godard,.mkv,À bout de souffle,1960,[Jean-Luc Godard],[]


In [10]:
files_details.to_csv('D:/Cinémathèque/movie_list.csv', header=True, index=False, sep=';')

# Update Movie details

# Some stat about the library

In [8]:
import statistics

statistics.mean(list(map(int, list(filter(None, files_details.date.to_list())))))

1995.8431876606685

In [9]:
res = files_details.directors.apply(tuple).value_counts()
pd.DataFrame(res).head(20)

Unnamed: 0,directors
"(Steven Spielberg,)",19
"(Hayao Miyazaki,)",11
"(Quentin Tarantino,)",10
"(John Carpenter,)",9
"(Stanley Kubrick,)",9
"(Martin Scorsese,)",8
"(Clint Eastwood,)",8
"(David Cronenberg,)",8
"(Christopher Nolan,)",8
"(David Fincher,)",8


# Update files metadata

Bash command example :
```bash
mkvpropedit movie.mkv --edit info --set "title=The movie" --edit track:a1 --set language=fre --edit track:a2 --set language=ita
```

Python example :
```python
'mkvpropedit ' + file_name + file_ext + ' --tags all:'  # Remove all tags
'mkvpropedit ' + file_name + file_ext + ' --edit info --set ' +   # Edit file info
    '"title=' + file_name + '" ' +
    '"year=' + file_year + '" ' +
    '"directors=' + file_directors + '"' +
    (' "tags=' + file_tag + '" ' if file_tag)
```

Bash command :
```bash
.\mkvpropedit "D:\Cinémathèque\L'illusionniste (2010) Sylvain Chomet.mkv" --edit info --set "title=L'illusionniste" --add "year=2010"
.\mkvpropedit "D:\Cinémathèque\L'illusionniste (2010) Sylvain Chomet.mkv" --list-property-names
```

.\mkvpropedit "D:\Cinémathèque\L'illusionniste (2010) Sylvain Chomet.mkv" --tags 

In [8]:
for idx, row in mkv_detail.iterrows():
    print('.\mkvpropedit "' + str(row.path + '/' + row.file + row.extension) +  '" --tags all:')
    print('.\mkvpropedit "' + str(row.path + '/' + row.file + row.extension) +  '" --edit info --set "title=' + str(row['name']) + '"\n')
    #print('.\mkvpropedit "' + str(row.path + '/' + row.file + row.extension) +  '" --edit info --set "comments=' + str({'date': row.date, 'directors': row.directors}) + '"\n')

.\mkvpropedit "F:/Data/Cinémathèque/(500) Jours Ensemble (2009) Marc Webb.mkv" --tags all:
.\mkvpropedit "F:/Data/Cinémathèque/(500) Jours Ensemble (2009) Marc Webb.mkv" --edit info --set "title=(500) Jours Ensemble"

.\mkvpropedit "F:/Data/Cinémathèque/1001 pattes (1998) John Lasseter.mkv" --tags all:
.\mkvpropedit "F:/Data/Cinémathèque/1001 pattes (1998) John Lasseter.mkv" --edit info --set "title=1001 pattes"

.\mkvpropedit "F:/Data/Cinémathèque/12 Hommes en Colere (1957) Sidney Lumet.mkv" --tags all:
.\mkvpropedit "F:/Data/Cinémathèque/12 Hommes en Colere (1957) Sidney Lumet.mkv" --edit info --set "title=12 Hommes en Colere"

.\mkvpropedit "F:/Data/Cinémathèque/12 Years a Slave (2013) Steve McQueen.mkv" --tags all:
.\mkvpropedit "F:/Data/Cinémathèque/12 Years a Slave (2013) Steve McQueen.mkv" --edit info --set "title=12 Years a Slave"

.\mkvpropedit "F:/Data/Cinémathèque/120 Battements par Minute (2017) Robin Campillo.mkv" --tags all:
.\mkvpropedit "F:/Data/Cinémathèque/120 Batteme

"{'date': '1992', 'directors': ['John Woo']}"

---

Thibault **Santonja**  
2021