# Configuration of Google Colab and Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/"Colab Notebooks"

/content/drive/MyDrive/Colab Notebooks


In [None]:
username = "IsaacOlguin"
repository =  "AutomatedTraumaDetectionInGCT"

In [None]:
%cd {repository}

/content/drive/MyDrive/Colab Notebooks/AutomatedTraumaDetectionInGCT


In [None]:
%ls -a

'01 Dataset analysis.ipynb'   [0m[01;34mdata[0m/   [01;34m.git[0m/   LICENSE   README.md


In [None]:
%pwd

'/content/drive/MyDrive/Colab Notebooks/AutomatedTraumaDetectionInGCT'

# Dataset Analysis

#### Installation of libraries

In [None]:
!pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


#### Import of libraries

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('data/genocide-transcript-corpus-v0.1.csv', delimiter=';')
print("=> First five rows:")
print(df.head(5))

=> First five rows:
                                           paragraph  label  tribunal  \
0  ýý ý Kingdom of Cam bodia Nation Religion King...      0         1   
1  pag e Questioning by Mr Kar Savuth commenc es ...      0         1   
2  decision on the removal of witnesses from the ...      0         1   
3  The defence agreed with the Chambers proposal ...      0         1   
4  therefore be in a position to compare them wit...      0         1   

     witness                             document      case        date  
0  Vann Nath  E1_39.1_TR001_20090629_Final_EN_Pub  Case 001  29.06.2009  
1  Vann Nath  E1_39.1_TR001_20090629_Final_EN_Pub  Case 001  29.06.2009  
2  Vann Nath  E1_39.1_TR001_20090629_Final_EN_Pub  Case 001  29.06.2009  
3  Vann Nath  E1_39.1_TR001_20090629_Final_EN_Pub  Case 001  29.06.2009  
4  Vann Nath  E1_39.1_TR001_20090629_Final_EN_Pub  Case 001  29.06.2009  


In [None]:
df.loc[0]

paragraph    ýý ý Kingdom of Cam bodia Nation Religion King...
label                                                        0
tribunal                                                     1
witness                                              Vann Nath
document                   E1_39.1_TR001_20090629_Final_EN_Pub
case                                                  Case 001
date                                                29.06.2009
Name: 0, dtype: object

In [34]:
df.iloc[0:2]

Unnamed: 0,paragraph,label,tribunal,witness,document,case,date
0,ýý ý Kingdom of Cam bodia Nation Religion King...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009
1,pag e Questioning by Mr Kar Savuth commenc es ...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009


In [36]:
df.iloc[1000:1002]["witness"]

1000    Witness JJ
1001    Witness JJ
Name: witness, dtype: object

In [37]:
df["datetime"] = pd.to_datetime(df["date"], format="%d.%m.%Y")

In [38]:
df.iloc[0:2]

Unnamed: 0,paragraph,label,tribunal,witness,document,case,date,datetime
0,ýý ý Kingdom of Cam bodia Nation Religion King...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
1,pag e Questioning by Mr Kar Savuth commenc es ...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29


In [72]:
# Number and list of witnesses (Implications of information public)

df_witness = df["witness"].drop_duplicates(keep="first")
print(df_witness)
print(f"\n\t\tNumber of witnesses: {df_witness.count()}")

0               Vann Nath
114              Chum Mey
214           Bay Sophany
266         Seoun Sovandy
308          Seng Sivutha
337           2-TCCP-1063
403              Sin Oeng
465             Isak Gasi
529          Fadil Redzic
562        Ibro Osmanovic
596           Edin Mrkalj
654     Hasiba Harambasic
700           Suada Ramic
707         Shefqet Zogaj
745            Osman Kuci
773         Hadije Fazliu
798          Sadik Januzi
815           Ndrec Konaj
840        Edison Zatriqi
877         Mahmut Halimi
916      Aferdita Hajrizi
939           Mehmet Aliu
995            Witness JJ
1097           Witness KK
1174           Witness NN
1283          Witness QAL
1322           Witness EV
Name: witness, dtype: object

		Number of witnesses: 27


In [76]:
# Total number of tribunals

df_tribunals = df["tribunal"].drop_duplicates(keep="first")
print(df_tribunals)
print(f"\n\t\tNumber of tribunals: {df_tribunals.count()}")

0      1
465    2
995    3
Name: tribunal, dtype: int64

		Number of tribunals: 3


In [74]:
# Total number of documents

df_documents = df["document"].drop_duplicates(keep="first")
print(df_documents)
print(f"\n\t\tNumber of documents: {df_documents.count()}")

0        E1_39.1_TR001_20090629_Final_EN_Pub
114      E1_40.1_TR001_20090630_Final_EN_Pub
214     E1_200.1_TR002_20130604_Final_EN_Pub
337     E1_505.1_TR002_20161201_Final_EN_Pub
465                                 960515IT
596                                 960718IT
707                                 020424IT
815                                 020425IT
916                                 020426IT
995                              TRA000026/1
1072                             TRA000027/1
1097                             TRA000030/1
1174                             TRA000034/1
1283                             TRA002354/2
1345                             TRA002353/2
1454                             TRA002352/1
Name: document, dtype: object

		Number of documents: 16


In [75]:
# Total number of cases

df_cases = df["case"].drop_duplicates(keep="first")
print(df_cases)
print(f"\n\t\tNumber of cases: {df_cases.count()}")

0         Case 001
214     Case 002-1
337     Case 002-2
465      IT-94-1-T
707       IT-02-54
995     ICTR-96-04
1283    ICTR-98-42
Name: case, dtype: object

		Number of cases: 7


In [78]:
df[df["tribunal"] == 1] # Paragraphs per tribunal

Unnamed: 0,paragraph,label,tribunal,witness,document,case,date,datetime
0,ýý ý Kingdom of Cam bodia Nation Religion King...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
1,pag e Questioning by Mr Kar Savuth commenc es ...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
2,decision on the removal of witnesses from the ...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
3,The defence agreed with the Chambers proposal ...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
4,therefore be in a position to compare them wit...,0,1,Vann Nath,E1_39.1_TR001_20090629_Final_EN_Pub,Case 001,29.06.2009,2009-06-29
...,...,...,...,...,...,...,...,...
460,you heard from someone else Can you tell us pl...,0,1,Sin Oeng,E1_505.1_TR002_20161201_Final_EN_Pub,Case 002-2,01.12.2016,2016-12-01
461,than and greater than signs has been correct...,0,1,Sin Oeng,E1_505.1_TR002_20161201_Final_EN_Pub,Case 002-2,01.12.2016,2016-12-01
462,you know how Heng Samrin implemented that orde...,0,1,Sin Oeng,E1_505.1_TR002_20161201_Final_EN_Pub,Case 002-2,01.12.2016,2016-12-01
463,Mr President if you would allow me please Q Mr...,0,1,Sin Oeng,E1_505.1_TR002_20161201_Final_EN_Pub,Case 002-2,01.12.2016,2016-12-01
