## Notebook to load and analyze Refugee Law Lab Reporter data

Requirements

    pip install pandas

If using parquet:

    pip install pyarrow 

(produced with Python 3.9.12)

## Load Data

#### Four options: Local (json & parquet) & Remote (json & parquet)

In [1]:
# OPTION 1: Load parquet data locally via cloned repo

# First, clone git repo
# Then run this code to load data

import pandas as pd
df = pd.read_parquet('DATA/rllr_cases.parquet')

# (if code fails, add engine='pyarrow' to read_parquet() function)

In [5]:
# OPTION 2: Load JSON data locally via cloned repo

# First, clone git repo
# Then run this code to load data

import pandas as pd
import json
import pathlib

# Set path to data
data_path = pathlib.Path('DATA/YEARLY/')

# Set variables
start_year = 2019  # First year of data sought (2019 +)
end_year = 2022  # Last year of data sought (2022 -)

# load data
results = []
for year in range(start_year, end_year+1):
    with open(data_path / f'{year}.json') as f:
        results.extend(json.load(f))

# convert to dataframe
df = pd.DataFrame(results)

In [3]:
# OPTION 3: Load parquet data remotely from GitHub without cloning repo

import pandas as pd
import requests
from io import BytesIO

url = 'https://github.com/Refugee-Law-Lab/rllr_bulk_data/raw/master/DATA/rllr_cases.parquet'

# load data
results = requests.get(url)

# convert to dataframe
df = pd.read_parquet(BytesIO(results.content))

# (if code fails, add engine='pyarrow' to read_parquet() function)

In [7]:
# OPTION 4: Load json data remotely from GitHub without cloning repo

import pandas as pd
import requests
import pathlib

# Set path to data
data_path = pathlib.Path('DATA/YEARLY/')

# Set variables
start_year = 2019  # First year of data sought (2019 +)
end_year = 2022  # Last year of data sought (2022 -)

base_ulr = 'https://raw.githubusercontent.com/Refugee-Law-Lab/rllr_bulk_data/master/DATA/YEARLY/'

# load data
results = []
for year in range(start_year, end_year+1):
        url = base_ulr + f'{year}.json'
        results.extend(requests.get(url).json())

# convert to dataframe
df = pd.DataFrame(results)

In [8]:
df

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
0,2019 RLLR 220,TB6-04576,RLLR,2019,,en,2019/05/14,https://refugeelab.ca/rllr/2019rllr220,2023-07-17,Citation: 2019 RLLR 220\nTribunal: Refugee Pro...,"{'country': 'Somalia', 'case_type': 'Religion'..."
1,2019 RLLR 219,TB6-07934,RLLR,2019,,en,2019/01/15,https://refugeelab.ca/rllr/2019rllr219,2023-07-17,Citation: 2019 RLLR 219\nTribunal: Refugee Pro...,"{'country': 'Iraq', 'case_type': 'Religion', '..."
2,2019 RLLR 218,TB8-15170,RLLR,2019,,en,2019/10/22,https://refugeelab.ca/rllr/2019rllr218,2023-07-17,Citation: 2019 RLLR 218\nTribunal: Refugee Pro...,"{'country': 'Mexico', 'case_type': 'PSG: Other..."
3,2019 RLLR 217,TB8-07871,RLLR,2019,,en,2019/05/08,https://refugeelab.ca/rllr/2019rllr217,2023-07-17,Citation: 2019 RLLR 217\nTribunal: Refugee Pro...,"{'country': 'Angola', 'case_type': 'Race/Ethni..."
4,2019 RLLR 216,TB9-14438,RLLR,2019,,en,2019/10/02,https://refugeelab.ca/rllr/2019rllr216,2023-07-17,Citation: 2019 RLLR 216\nTribunal: Refugee Pro...,"{'country': 'China', 'case_type': 'Race/Ethnic..."
...,...,...,...,...,...,...,...,...,...,...,...
475,2021 RLLR 4,TB9-15167,RLLR,2021,,en,2021/05/05,https://refugeelab.ca/rllr/2021rllr4,2023-07-17,Citation: 2021 RLLR 4\nTribunal: Refugee Prote...,"{'country': 'Zimbabwe', 'case_type': 'Politica..."
476,2021 RLLR 3,MB8-07585,RLLR,2021,,en,2021/01/18,https://refugeelab.ca/rllr/2021rllr3,2023-07-17,Citation: 2021 RLLR 3\nTribunal: Refugee Prote...,"{'country': 'South Africa', 'case_type': 'PSG:..."
477,2021 RLLR 2,TB9-27084,RLLR,2021,,en,2021/05/12,https://refugeelab.ca/rllr/2021rllr2,2023-07-17,Citation: 2021 RLLR 2\nTribunal: Refugee Prote...,"{'country': 'Uganda', 'case_type': 'Religion',..."
478,2021 RLLR 1,TB9-17419,RLLR,2021,,en,2021/06/15,https://refugeelab.ca/rllr/2021rllr1,2023-07-17,Citation: 2021 RLLR 1\nTribunal: Refugee Prote...,"{'country': 'Pakistan', 'case_type': 'Religion..."


# Working with the data

In [13]:
# to access the data in the "other" column directly, use this code:

# convert each item in other dictionary to column
df = pd.concat([df.drop(['other'], axis=1), df['other'].apply(pd.Series)], axis=1)
df

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,case_type,country,member
0,2022 RLLR 1,TB8-20107,RLLR,2022,,en,2022/04/29,https://refugeelab.ca/rllr/2022rllr1,2023-07-17,Citation: 2022 RLLR 1\nTribunal: Refugee Prote...,PSG: Gender Based Violence,Nigeria,M. Gayda
1,2021 RLLR 76,VC1-06497,RLLR,2021,,en,2021/12/21,https://refugeelab.ca/rllr/2021rllr76,2023-07-17,Citation: 2021 RLLR 76\nTribunal: Refugee Prot...,PSG: SOGIE,Nigeria,Lesley Stalker
2,2021 RLLR 75,VC1-05121,RLLR,2021,,en,2021/11/19,https://refugeelab.ca/rllr/2021rllr75,2023-07-17,Citation: 2021 RLLR 75\nTribunal: Refugee Prot...,PSG: SOGIE,South Korea,David Jones
3,2021 RLLR 74,VC1-02500,RLLR,2021,,en,2021/06/14,https://refugeelab.ca/rllr/2021rllr74,2023-07-17,Citation: 2021 RLLR 74\nTribunal: Refugee Prot...,PSG: SOGIE,Malawi,Jennifer Smith
4,2021 RLLR 73,VC1-02405,RLLR,2021,,en,2021/10/30,https://refugeelab.ca/rllr/2021rllr73,2023-07-17,Citation: 2021 RLLR 73\nTribunal: Refugee Prot...,PSG: SOGIE,Iran,Isis Marianne van Loon
...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,2019 RLLR 5,MB7-21566,RLLR,2019,,en,2019/10/17,https://refugeelab.ca/rllr/2019rllr5,2023-07-17,Citation: 2019 RLLR 5\nTribunal: Refugee Prote...,PSG: SOGIE,Haiti,Ethan McMonagle
476,2019 RLLR 4,MB7-18975,RLLR,2019,,en,2019/07/04,https://refugeelab.ca/rllr/2019rllr4,2023-07-17,Citation: 2019 RLLR 4\nTribunal: Refugee Prote...,PSG: Gender Based Violence,Haiti,Nicole Ginsberg
477,2019 RLLR 3,MB7-18354,RLLR,2019,,en,2019/09/10,https://refugeelab.ca/rllr/2019rllr3,2023-07-17,Citation: 2019 RLLR 3\nTribunal: Refugee Prote...,No Nexus: Criminality/Corruption,Haiti,Me Jean-Guy Jam
478,2019 RLLR 2,TB9-01394,RLLR,2019,,en,2019/12/23,https://refugeelab.ca/rllr/2019rllr2,2023-07-17,Citation: 2019 RLLR 2\nTribunal: Refugee Prote...,PSG: SOGIE,Nigeria,Marcelle Bourassa
