# Extracting data from the court cases scraped using scrapingcases.py

In [1]:
import pandas as pd
import regex as re

# Some helper functions that find the case number and the date of the ruling

In [2]:
def monat(month):
    monate = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
    monthint = monate.index(month) + 1        
    return monthint

def mese(month):
    monate = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre']
    monthint = monate.index(month.lower()) + 1        
    return monthint
    
def mois(month):
    monate = ['janvi', 'févri', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre']
    monthint = monate.index(month.lower()) + 1        
    return monthint

def find_case_number(ruling):
    case_number = re.findall('[D-E]{1}-[0-9]{1,4}/[0-9]{4}', ruling)
    return case_number


def to_datetime(head):
    if 'Urteil' in head:
        dateline = re.findall('Urteil vom\s{1,2}[0-9]{1,2}\.\s{0,1}\w+ [0-9]{4}', head)
        dateline = dateline[0]

        day = re.findall('[0-9]{1,2}', dateline)
        day = day[0]
        month = re.findall('[A-S]{1}[a-zä]{1,20}', dateline)
        month = monat(month[0])
        year = re.findall('[0-9]{4}', dateline)
        year = year[0]
        
        date = pd.to_datetime(str(day) + '/' + str(month) + '/' + str(year), format='%d/%m/%Y')
        return date
    
    if 'Arrêt' in head:
        dateline = re.findall('Arrêt du\s{1,2}[0-9]{1,2}\s{0,2}\w+ [0-9]{4}', head.replace('er', '')) #.replace('nd', ''))
        dateline = dateline[0]
        
        day = re.findall('[0-9]{1,2}', dateline)
        day = day[0]
        month = re.findall('[a-zéôû]{1,20}', dateline)
        month = mois(month[3])
        year = re.findall('[0-9]{4}', dateline)
        year = year[0]

        date = pd.to_datetime(str(day) + '/' + str(month) + '/' + str(year), format='%d/%m/%Y')
        return date
    
    if 'Sentenza' in head:
        dateline = re.findall('Sentenza del\s{1,2}[0-9]{1,2}\s{0,2}\w+ [0-9]{4}', head.replace('°', '').replace("l'", " ")) #.replace('nd', ''))
        no_del = re.findall('Sentenza\s{1,2}[0-9]{1,2}\s{0,2}\w+ [0-9]{4}', head.replace('°', '').replace("l'", " ")) #.replace('nd', ''))
        if len(no_del) > 0:
            dateline = no_del[0].replace('Sentenza', 'Sentenza del')
        
        else:
            dateline = dateline[0]
        
        day = re.findall('[0-9]{1,2}', dateline)
        day = day[0]
        month = re.findall('[a-zéôû]{1,20}', dateline)
        month = mese(month[2])
        year = re.findall('[0-9]{4}', dateline)
        year = year[0]

        date = pd.to_datetime(str(day) + '/' + str(month) + '/' + str(year), format='%d/%m/%Y')
        return date

# Make list of nationalities

In [1]:
import gettext
import pycountry

countries_german = []
for country in pycountry.countries:
    german = gettext.translation('iso3166', pycountry.LOCALES_DIR, languages=['de'])
    german.install()
    countries_german.append(_(country.name))

countries_french = []
for country in pycountry.countries:
    french = gettext.translation('iso3166', pycountry.LOCALES_DIR, languages=['fr'])
    french.install()
    countries_french.append(_(country.name))

countries_italian = []
for country in pycountry.countries:
    italian = gettext.translation('iso3166', pycountry.LOCALES_DIR, languages=['it'])
    italian.install()
    countries_italian.append(_(country.name))

nationalities = []
for i in range(len(countries_german)):
    nationalities.append([countries_german[i], countries_french[i], countries_italian[i]])

ModuleNotFoundError: No module named 'pycountry'

# Search for nationalities

In [3]:
# Adjust list manually to match the court's practice.
nationalities = [['unbekannt', 'nationalité indéterminée'], ['staatenlos', 'ohne nationalität', 'ohne Staats', 'sans nationalité'], ['Kosovo'],['Aruba'], ['Afghanistan'], ['Angola', 'Angola', 'Angola'], ['Anguilla'], ['Albanien', 'Albanie', 'Albania'], ['Andorra', 'Andorre'], ['Vereinigte Arabische Emirate', 'Émirats arabes unis', 'Emirats arabes unis', 'Emirati arabi uniti'], ['Argentinien', 'Argentine', 'Argentina'], ['Armenien', 'Arménie', 'Armenia'], ['Amerikanisch-Samoa', 'Samoa américaines', 'Samoa americane'], ['Antarktis', 'Antarctique', 'Antartide'], ['Französische Süd- und Antarktisgebiete', 'Terres australes françaises', 'Territori meridionali francesi'], ['Antigua und Barbuda', 'Antigua-et-Barbuda', 'Antigua e Barbuda'], ['Australien', 'Australie', 'Australia'], ['Österreich', 'Autriche', 'Austria'], ['Aserbaidschan', 'Azerbaïdjan', 'Azerbaigian'], ['Burundi', 'Burundi', 'Burundi'], ['Belgien', 'Belgique', 'Belgio'], ['Benin', 'Bénin', 'Benin'], ['Bonaire, Sint Eustatius und Saba', 'Bonaire, Saint-Eustache et Saba', 'Bonaire, Saint Eustatius e Saba'], ['Burkina Faso', 'Burkina Faso', 'Burkina-Faso'], ['Bangladesch', 'Bangladesh', 'Bangladesh'], ['Bulgarien', 'Bulgarie', 'Bulgaria'], ['Bahrain', 'Bahreïn', 'Bahrein'], ['Bahamas', 'Bahamas', 'Bahamas'], ['Bosnien und Herzegowina', 'Bosnie-Herzégovine', 'Bosnia-Erzegovina'], ['Saint-Barthélemy', 'Saint-Barthélemy', 'Saint-Barths'], ['Belarus', 'Weißrussland', 'Bélarus', 'Bielorussia'], ['Belize', 'Belize', 'Belize'], ['Bermuda', 'Bermudes', 'Bermuda'], ['Bolivien', 'Bolivie', 'Bolivia'], ['Brasilien', 'Brésil', 'Brasile'], ['Barbados', 'Barbade', 'Barbados'], ['Brunei Darussalam', 'Brunéi Darussalam', 'Brunei'], ['Bhutan', 'Bhoutan', 'Bhutan'], ['Bouvet-Insel', 'Bouvet, Île', 'Isola di Bouvet'], ['Botsuana', 'Botswana'], ['Zentralafrikanische Republik', 'Centrafricaine, République', 'Repubblica Centrafricana'], ['Kanada', 'Canada'], ['Kokos-(Keeling-)Inseln', 'Cocos (Keeling), Îles', 'Isole Cocos (Keeling)'], ['Chile', 'Chili', 'Cile'], ['China', 'Chine', 'Cina'], ["Côte d'Ivoire", "Côte d'Ivoire", "Costa d'Avorio"], ['Kamerun', 'Cameroun', 'Camerun'], ['Kongo (beide)', 'Kongo', 'Congo', 'Congo'], ['Cookinseln', 'Cook, Îles', 'Isole Cook'], ['Kolumbien', 'Colombie', 'Colombia'], ['Komoren', 'Comores', 'Comore'], ['Cabo Verde', 'Cabo Verde', 'Cabo Verde'], ['Costa Rica', 'Costa Rica', 'Costa Rica'], ['Kuba', 'Cuba', 'Cuba'], ['Curaçao', 'Curaçao', 'Curaçao'], ['Weihnachtsinseln', 'Christmas, Île', 'Isola Christmas'], ['Cayman-Inseln', 'Caïman, Îles', 'Isole Cayman'], ['Zypern', 'Chypre', 'Cipro'], ['Tschechische Republik','Czechia', 'Czechia', 'Czechia'], ['Deutschland', 'Allemagne', 'Germania'], ['Dschibuti', 'Djibouti', 'Gibuti'], ['Dominica', 'Dominique', 'Dominica'], ['Dänemark', 'Danemark', 'Danimarca'], ['Dominikanische Republik', 'Dominicaine, République', 'Repubblica Dominicana'], ['Algerien', 'Algérie', 'Algeria'], ['Ecuador', 'Équateur', 'Equateur', 'Ecuador'], ['Ägypten', 'Aegypten', 'Égypte', 'Egypte', 'Egitto'], ['Eritrea', 'Érythrée', 'Erythrée', 'Eritrea'], ['Westsahara', 'Sahara Occidental', 'Sahara occidentale'], ['Spanien', 'Espagne', 'Spagna'], ['Estland', 'Estonie', 'Estonia'], ['Äthiopien', 'Éthiopie', 'Ethiopie', 'Etiopia'], ['Finnland', 'Finlande', 'Finlandia'], ['Fidschi', 'Fidji', 'Fiji'], ['Falklandinseln (Malwinen)', 'Falkland, Îles (Malvinas)', 'Isole Falkland (Malvine)'], ['Frankreich', 'France', 'Francia'], ['Färöer-Inseln', 'Féroé, Îles', 'Isole Fær Øer'], ['Mikronesien', 'Micronésie', 'Micronesia'], ['Gabun', 'Gabon', 'Gabon'], ['Vereinigtes Königreich', 'Royaume-Uni', 'Regno Unito'], ['Georgien', 'Géorgie', 'Georgia'], ['Guernsey', 'Guernesey', 'Guernsey'], ['Ghana', 'Ghana', 'Ghana'], ['Gibraltar', 'Gibraltar', 'Gibilterra'], ['Guinea', 'Guinée', 'Guinea'], ['Guadeloupe', 'Guadeloupe', 'Guadalupa'], ['Gambia', 'Gambie', 'Gambia'], ['Guinea-Bissau', 'Guinée-Bissau', 'Guinea-Bissau'], ['Äquatorialguinea', 'Guinée Équatoriale', 'Guinée Equatoriale', 'Guinea equatoriale'], ['Griechenland', 'Grèce', 'Grecia'], ['Grenada', 'Grenade', 'Grenada'], ['Grönland', 'Groënland', 'Groenlandia'], ['Guatemala', 'Guatemala', 'Guatemala'], ['Französisch-Guyana', 'Guyane française', 'Guyana francese'], ['Guam', 'Guam', 'Guam'], ['Guyana', 'Guyana', 'Guyana'], ['Hongkong', 'Hong-Kong', 'Hong Kong'], ['Heard und McDonaldinseln', 'Heard, Île et McDonald, Îles', 'Isole Heard e McDonald'], ['Honduras', 'Honduras', 'Honduras'], ['Kroatien', 'Croatie', 'Croazia'], ['Haiti', 'Haïti', 'Haiti'], ['Ungarn', 'Hongrie', 'Ungheria'], ['Indonesien', 'Indonésie', 'Indonesia'], ['Insel Man', 'Île de Man', 'Isola di Man'], ['Indien', 'Inde', 'India'], ['Britisches Territorium im Indischen Ozean', "Océan Indien, Territoire britannique de l'", "Territorio britannico dell'Oceano Indiano"], ['Irland', 'Irlande', 'Irlanda'], ['Iran', 'Iran', 'Iran'], ['Irak', 'Irak', 'Iraq'], ['Island', 'Islande', 'Islanda'], ['Israel', 'Israël', 'Israele'], ['Italien', 'Italie', 'Italia'], ['Jamaika', 'Jamaïque', 'Giamaica'], ['Jersey', 'Jersey', 'Jersey'], ['Jordanien', 'Jordanie', 'Giordania'], ['Japan', 'Japon', 'Giappone'], ['Kasachstan', 'Kazakhstan', 'Kazakistan'], ['Kenia', 'Kenya', 'Kenya'], ['Kirgisistan', 'Kirghizistan', 'Kirghizistan'], ['Kambodscha', 'Cambodge', 'Cambogia'], ['Kiribati', 'Kiribati', 'Kiribati'], ['St. Kitts und Nevis', 'Saint-Kitts-et-Nevis', 'Saint Kitts e Nevis'], ['Korea', 'Corée', 'Corea del Sud'], ['Kuwait', 'Koweït', 'Kuwait'], ['Laos', 'Lao', 'Laos'], ['Libanon', 'Liban', 'Libano'], ['Liberia', 'Libéria', 'Liberia'], ['Libyen', 'Libye', 'Libia'], ['St. Lucia', 'Sainte-Lucie', 'Saint Lucia'], ['Liechtenstein', 'Liechtenstein', 'Liechtenstein'], ['Sri Lanka', 'Sri Lanka', 'Sri Lanka'], ['Lesotho', 'Lesotho', 'Lesotho'], ['Litauen', 'Lituanie', 'Lituania'], ['Luxemburg', 'Luxembourg', 'Lussemburgo'], ['Lettland', 'Lettonie', 'Lettonia'], ['Macao', 'Macau', 'Macao'], ['Saint Martin (Französischer Teil)', 'Saint-Martin (partie française)', 'Saint-Martin (Francia)'], ['Marokko', 'Maroc', 'Marocco'], ['Monaco', 'Monaco', 'Monaco'], ['Moldawien', 'Moldova', 'Moldavia'], ['Madagaskar', 'Madagascar', 'Madagascar'], ['Malediven', 'Maldives', 'Maldive'], ['Mexiko', 'Mexique', 'Messico'], ['Marshallinseln', 'Îles Marshall', 'Isole Marshall'], ['Mazedonien', 'Macédoine', 'Macedonia'], ['Mali', 'Mali', 'Mali'], ['Malta', 'Malte', 'Malta'], ['Myanmar', 'Myanmar', 'Myanmar'], ['Montenegro', 'Monténégro', 'Montenegro'], ['Mongolei', 'Mongolie', 'Mongolia'], ['Nördliche Mariana-Inseln', 'Mariannes du Nord, Îles', 'Marianne settentrionali'], ['Mosambik', 'Mozambique', 'Mozambico'], ['Mauretanien', 'Mauritanie', 'Mauritania'], ['Montserrat', 'Montserrat', 'Montserrat'], ['Martinique', 'Martinique', 'Martinica'], ['Mauritius', 'Maurice', 'Maurizio'], ['Malawi', 'Malawi', 'Malawi'], ['Malaysia', 'Malaisie', 'Malaysia'], ['Mayotte', 'Mayotte', 'Mayotte'], ['Namibia', 'Namibie', 'Namibia'], ['Neukaledonien', 'Nouvelle-Calédonie', 'Nuova Caledonia'], ['Nigeria oder Niger','Niger', 'Niger', 'Niger'], ['Norfolkinsel', 'Norfolk, Île', 'Isola Norfolk'], ['Nigeria', 'Nigeria', 'Nigéria', 'Nigeria'], ['Nicaragua', 'Nicaragua', 'Nicaragua'], ['Niue', 'Nioue', 'Niue'], ['Niederlande', 'Pays-Bas', 'Paesi Bassi'], ['Norwegen', 'Norvège', 'Norvegia'], ['Nepal', 'Népal', 'Nepal'], ['Nauru', 'Nauru', 'Nauru'], ['Neuseeland', 'Nouvelle-Zélande', 'Nuova Zelanda'], ['Oman', 'Oman', 'Oman'], ['Pakistan', 'Pakistan', 'Pakistan'], ['Panama', 'Panama', 'Panama'], ['Pitcairn', 'Pitcairn', 'Pitcairn'], ['Peru', 'Pérou', 'Perù'], ['Philippinen', 'Philippines', 'Filippine'], ['Palau', 'Palaos', 'Palau'], ['Papua-Neuguinea', 'Papouasie-Nouvelle-Guinée', 'Papua Nuova Guinea'], ['Polen', 'Pologne', 'Polonia'], ['Puerto Rico', 'Porto Rico', 'Puerto Rico'], ['Demokratische Volksrepublik Korea', 'Nordkorea', 'République populaire démocratique de Corée', 'Corée du Nord', 'Corea del Nord'], ['Portugal', 'Portugal', 'Portogallo'], ['Paraguay', 'Paraguay', 'Paraguay'], ['Palästina', 'Palestine', 'Palestina', "d'origine palestinienne", 'palästinensisch'], ['Französisch-Polynesien', 'Polynésie française', 'Polinesia francese'], ['Katar', 'Qatar', 'Qatar'], ['Réunion', 'Réunion, Île de la', 'Riunione'], ['Rumänien', 'Roumanie', 'Romania'], ['Russische Föderation', 'Russland', 'Russie', 'Russia'], ['Ruanda', 'Rwanda'], ['Saudi-Arabien', 'Arabie saoudite', 'Arabia Saudita'], ['Sudan', 'Soudan'], ['Senegal', 'Sénégal'], ['Singapur', 'Singapour', 'Singapore'], ['South Georgia und die Südlichen Sandwichinseln', 'Géorgie du Sud et les îles Sandwich du Sud', 'Georgia del Sud e Sandwich australi'], ['St. Helena, Ascension und Tristan da Cunha', 'Sainte-Hélène, Ascension et Tristan da Cunha', 'Saint Helena, Ascension e Tristan da Cunha'], ['Svalbard und Jan Mayen', 'Svalbard et île Jan Mayen', 'Svalbard e Jan Mayen'], ['Salomoninseln', 'Salomon, Îles', 'Isole Salomone'], ['Sierra Leone', 'Sierra Leone', 'Sierra Leone'], ['El Salvador', 'El Salvador', 'El Salvador'], ['San Marino', 'San Marin', 'San Marino'], ['Somalia', 'Somalie', 'Somalia'], ['St. Pierre und Miquelon', 'Saint-Pierre-et-Miquelon', 'Saint-Pierre e Miquelon'], ['Serbien', 'Serbie', 'Serbia'], ['Südsudan', 'Soudan du Sud', 'Sudan meridionale'], ['São Tomé und Príncipe', 'Sao Tomé-et-Principe', 'São Tomé e Príncipe'], ['Suriname', 'Surinam', 'Suriname'], ['Slowakei', 'Slovaquie', 'Slovacchia'], ['Slowenien', 'Slovénie', 'Slovenia'], ['Schweden', 'Suède', 'Svezia'], ['Swasiland', 'Swaziland', 'Swaziland'], ['Saint-Martin (Niederländischer Teil)', 'Saint-Martin (partie néerlandaise)', 'Sint Maarten (parte olandese)'], ['Seychellen', 'Seychelles', 'Seicelle'], ['Syrien', 'syrischer Herkunft', 'Syrie', 'Siria'], ['Turks- und Caicosinseln', 'Turks et Caïques, Îles', 'Isole Turks e Caicos'], ['Tschad', 'Tchad', 'Ciad'], ['Togo', 'Togo', 'Togo'], ['Thailand', 'Thaïlande', 'Thailandia'], ['Tadschikistan', 'Tadjikistan', 'Tagikistan'], ['Tokelau', 'Tokelau', 'Tokelau'], ['Turkmenistan', 'Turkménistan', 'Turkmenistan'], ['Timor-Leste', 'Timor orientale'], ['Tonga'], ['Trinidad'], ['Tunesien', 'Tunisie', 'Tunisia'], ['Türkei', 'Turquie', 'Turchia'], ['Tuvalu', 'Tuvalu', 'Tuvalu'], ['Taiwan, Chinesische Provinz', 'Taïwan, province de Chine', 'Taiwan'], ['Tansania', 'Tanzanie', 'Tanzania'], ['Uganda', 'Ouganda', 'Uganda'], ['Ukraine', 'Ukraine', 'Ucraina'], ['United States Minor Outlying Islands', "Îles mineures éloignées des États-Unis d'Amérique", 'Isole minori statunitensi'], ['Uruguay', 'Uruguay', 'Uruguay'], ['Vereinigte Staaten', 'États-Unis', 'Stati Uniti'], ['Usbekistan', 'Ouzbékistan', 'Uzbekistan'], ['Heiliger Stuhl (Staat Vatikanstadt)', 'Saint-Siège (état de la cité du Vatican)', 'Santa Sede (Stato della Città del Vaticano)'], ['St. Vincent und die Grenadinen', 'Saint-Vincent-et-les Grenadines', 'Saint Vincent e Grenadine'], ['Venezuela', 'Vénézuela', 'Venezuela'], ['Britische Jungferninseln', 'Îles Vierges britanniques', 'Isole Vergini britanniche'], ['Amerikanische Jungferninseln', 'Îles Vierges des États-Unis', 'Isole Vergini, U.S.'], ['Vietnam', 'Viet Nam', 'Vietnam'], ['Vanuatu', 'Vanuatu', 'Vanuatu'], ['Wallis und Futuna', 'Wallis et Futuna', 'Wallis e Futuna'], ['Samoa', 'Samoa', 'Samoa'], ['Jemen', 'Yémen', 'Yemen'], ['Südafrika', 'Afrique du Sud', 'Sud Africa'], ['Sambia', 'Zambie', 'Zambia'], ['Simbabwe', 'Zimbabwe', 'Zimbabwe']]

In [4]:
def nationality(head):
    nationality = 'not found'
    for i in range(0, len(nationalities)):   
        for item in nationalities[i]:
            if item.lower() in head.lower():
                nationality=nationalities[i][0]
    return nationality

# Find gender

In [62]:
def sex(ruling):
    sex = 'unknown'
    claimant = group_claimant = []
    
    # German
    claimant = re.findall(r'beschwerdeführer[i]{0,1}[n]{0,1}', ruling.lower())
    if len(claimant) > 0:
        sex = 'M'
        if 'die beschwerdeführer' in claimant:
            sex = 'Group'
        if 'beschwerdeführerin' in claimant:
            sex = 'F'
            
    if 'die beschwerdeführenden' in ruling:
        sex = 'Group'
        
    group_claimant = re.findall(r'die beschwerdeführer\s{1}', ruling.lower())
    
    if len(group_claimant) > 0:
        sex = 'Group'
    
    # French
    if 'la recourante' in ruling:
        sex = 'F'
    elif 'le recourant' in ruling:
        sex = 'M'
    elif 'les recourants' in ruling:
        sex = 'Group'
    
    # Italian
    if "nato" in head:
        sex = 'M'
    elif "nata" in head:
        sex = 'F'
    elif ('nata' in head) & ('nato' in head):
        sex = 'Group'
    # Problem: families consisting only of women or men. People without birthdate
    
    
    return sex

# Final verdict in the case

In [6]:
def verdict_italian(ruling):
    try:
        verdict = re.findall(r'Tribunale amministrativo federale pronuncia\s*:\s*1.([^.]*)', ruling)
        verdict = verdict[:150]
        if ('respinto' in verdict) | ('inammissibile' in verdict):
            verdict = 'rejected'
        elif 'accolto' in verdict:
            verdict = 'granted'
        return verdict
    except:
        None

def verdict_french(ruling):
    try:
        verdict = re.findall(r'le Tribunal administratif fédéral prononce\s*:\s*1.([^.]*)', ruling)
        verdict = verdict[0][:150]
        if ('rejeté' in verdict)|('écarté'):
            verdict = 'rejected'
        elif 'admis' in verdict:
            verdict = 'granted'
        #elif 
        return verdict
    except:
        None

def verdict_german(ruling):
    try:
        verdict = re.findall(r'erkennt das Bundesverwaltungsgericht\s*:\s*1.([^.]*)', ruling)
        verdict = verdict[0][:150]
        if ('abgewiesen' in verdict):
            verdict = 'rejected'
        elif 'gutgeheissen' in verdict:
            verdict = 'granted'
        return verdict
    except:
        None

# Helper function: read case file

In [7]:
def read_ruling(file):
    ruling_file = open('txtfiles/' + str(file) + '.txt', 'r')
    ruling = ruling_file.read().replace('-\n', '').replace('\n', ' ')
    ruling_file.close()
    return ruling

# Helper function: read statutes file

In [8]:
def read_statutes(file):
    try:
        federal_statutes_file = open('/Users/philiphanke/Dropbox/pythonprojects/bvger/swiss-asylum-judges/txtfiles/2017/' + str(file) + 'statutes.txt', 'r')
        federal_statutes = federal_statutes_file.read()
        federal_statutes_file.close()
    except:
        federal_statutes='NaN \n NaN'
    return federal_statutes

# Reading the cases, applying helper function, build DataFrame

In [63]:
df_deutsch = pd.DataFrame([['']], columns=['ruling'])
df_francais = pd.DataFrame([['']], columns=['ruling'])
df_italiano = pd.DataFrame([['']], columns=['ruling'])

for file in range(0, 3256):
    ruling = read_ruling(file)
    federal_statutes = read_statutes(file)
    
    head = ruling[0:700]
    
    # Generate temporary DataFrame to be appended to the main DataFrame
    df_temp = pd.DataFrame([[file, find_case_number(head), to_datetime(head), nationality(head), head, sex(ruling)]], columns=['filenumber', 'casenumber', 'date', 'nationality', 'head', 'sex'])


    # Takes the statutes file, turns it into a list split by line break, and then generates a dictionary. Odd lines are the name of the law, even lines are the cited paragraphs.
    statutes_list = federal_statutes.split('\n')
    statutes_dict = {}
    for item in range(len(statutes_list)):
        if item % 2 == 0:
            statutes_dict[statutes_list[item]] = statutes_list[item + 1]
    
        
    # Adds a column for every law and takes the cited paragraphs as observation.
    for key in statutes_dict:
        df_temp[key[0:-1]] = statutes_dict[key]
   
    if 'Urteil' in ruling:
        df_temp['verdict'] = verdict_german(ruling)
        df_deutsch = df_deutsch.append(df_temp)
    if 'Arrêt' in ruling:
        df_temp['verdict'] = verdict_french(ruling)
        df_francais = df_francais.append(df_temp)
    if 'Sentenza' in ruling:
        df_temp['verdict'] = verdict_italian(ruling)
        df_italiano = df_italiano.append(df_temp)
    #print(file)
    
    if file % 500 == 0:
        print(file)

print('Done')
print('\a'*3)

0
500
1000
1500
2000
2500
3000
Done



In [10]:
df_deutsch.head()

Unnamed: 0,0.105,0.276.191.361,0.312.1,0.351.913.61,0.632.312.141.1,0.632.316.491.1,0.632.317.411.23,0.748.131.913.6,744.171,AHVG,...,ZGB,ZStV,casenumber,date,filenumber,head,nationality,ruling,sex,verdict
0,,,,,,,,,,,...,,,,NaT,,,,,,
0,"Art.3,",,,,,,,,,,...,,,[E-6164/2014],2017-01-03 00:00:00,0.0,Bundesverwaltungsgericht Tribunal administra...,Ägypten,,F,rejected
0,,,,,,,,,,,...,,,[D-8013/2016],2017-01-03 00:00:00,1.0,Bundesverwaltungsgericht Tribunal administra...,Somalia,,F,rejected
0,,,,,,,,,,,...,,,[D-7853/2016],2017-01-03 00:00:00,2.0,Bundesverwaltungsgericht Tribunal administra...,Türkei,,M,rejected
0,"Art.3,",,,,,,,,,,...,,,[D-6367/2015],2017-01-03 00:00:00,5.0,Bundesverwaltungsgericht Tribunal administra...,Usbekistan,,F,rejected
