# Llama3 Model

### Importing libraries

In [None]:
from langchain_community.llms import Ollama
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import re
import nltk
from fuzzywuzzy import fuzz
import torch

### Load data and model

In [None]:
df = pd.read_csv('final_data.csv')

llama_model = Ollama(model="llama3")

### Split into train and test sets

In [None]:
train, test = train_test_split(df, test_size=0.3, random_state=42)

print("Train set size:", len(train))
print("Test set size:", len(test))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Base Model

### Send and filter prompts to Llama 

In [None]:
def filter_response(response):
    unwanted_phrases = [
        "The full name of the organization is:",
        "The organization's full name is:",
        "The full name of the organization mentioned in the report is:"
    ]
    
    for phrase in unwanted_phrases:
        response = response.replace(phrase, "")
    
    response = response.strip()
    return response

In [None]:
def get_organization_name(data, llama_model):
    results = []
    for index, row in tqdm(data.iterrows(), total=data.shape[0], desc="Processing texts"):
        prompt = [f"Identify only the full name of the organization from the following Dutch ANNUAL REPORT and provide it in its original language. Return only the organization name and nothing else. ANNUAL REPORT: {row['Cleaned Text']}"]
        response = llama_model.invoke(prompt)
        
        if response:
            extracted_org = filter_response(response)
        else:
            extracted_org = 'No response'
        results.append((row['Cleaned Text'], extracted_org))
    return results

### Run base model

In [None]:
organization_names = get_organization_name(test, llama_model)

for text, org_name in organization_names:
    print(f"Extracted Organization: {org_name}\n")

### Evaluation

In [None]:
def accuracy(test, organization_names)
    true_orgs = test['True Organization'].tolist()
    pred_orgs = [org_name.lower() for text, org_name in organization_names]

    correct = 0
    total = 0
    for pred, true in zip(pred_orgs, true_orgs):
        pred = str(pred)
        true = str(true)
        if pred == true:
            correct += 1
        elif pred in true:
            correct += 1
        elif true in pred:
            correct += 1
        total += 1

    accuracy = correct / total if total > 0 else 0
    return f"Accuracy: {accuracy:.4f}"

accuracy(test, organization_names)

Accuracy: 0.4387


In [None]:
def fuzzy_accuracy(df, organization_names)
    true_orgs = df['True Organization'].tolist()
    pred_orgs = [org_name.lower() for text, org_name in organization_names]

    correct = 0
    total = 0
    threshold = 80  # Adjust the threshold as needed

    for pred, true in zip(pred_orgs, true_orgs):
        pred = str(pred).lower().strip()
        true = str(true).lower().strip()

        match_score = fuzz.partial_ratio(pred, true)
        if match_score >= threshold:
            correct += 1
        total += 1

    accuracy = correct / total if total > 0 else 0
    return f"Accuracy: {accuracy:.4f}"

fuzzy_accuracy(test, organization_names)

#### Precision & Recall

## Second Dataset

In [None]:
seconddata = pd.read_csv('final_seconddata.csv')

In [None]:
organization_names = get_organization_name(seconddata, llama_model)

for text, org_name in organization_names:
    print(f"Extracted Organization: {org_name}\n")

Processing texts:   0%|          | 1/209 [02:30<8:41:15, 150.36s/it]

The full name of the organization mentioned in the annual report is:

* Openbare Vlaamse Afvalstoffenmaatschappij (OVAM)
* Vlaamse Milieumaatschappij (VMM)
* Vlaamse Landmaatschappij (VLDM)

Note that these are all Dutch government organizations, and their full names translate to:

* Public Flemish Waste Management Company (OVAM)
* Flemish Environmental Agency (VMM)
* Flemish Land Agency (VLDM)


Processing texts:   1%|          | 2/209 [05:24<9:27:51, 164.60s/it]

The full name of the organization mentioned in the report is:

"Vlaamse Audit Autoriteit (VAA)" or "Flemish Auditing Authority"


Processing texts:   1%|▏         | 3/209 [08:37<10:09:08, 177.42s/it]

Here are the full names of the organization mentioned in the text:

1. Dienst van de Bestuursrechtscolleges (DBRC)
2. Raad voor Vergunningsbetwistingen (RVVB)
3. Handhavingscollege (HHC)
4. Dienst van de Bestuursrechtscolleges en de Erdoor Overkoepelde en Ondersteunde Bestuursrechtscolleges (DBRC-DO)
5. Raad voor Betwistingen inzake Studievoortgangsbeslissingen (RSTVB)

Note that some of these names are repeated throughout the text, but I have only listed each one once in the above response.


Processing texts:   2%|▏         | 4/209 [10:54<9:11:01, 161.28s/it] 

Based on the provided Dutch annual report, I have identified the full name of the organization:

**Sport Vlaanderen**

Please note that there are other organizations mentioned in the text, such as "Vink Creations" and "Get Insane", but **Sport Vlaanderen** is the primary organization being discussed.


Processing texts:   2%|▏         | 5/209 [13:10<8:37:58, 152.34s/it]

The organization name mentioned in the annual report is:

"literatuur vlaanderen"

(Note: The original language of this text is Dutch.)


Processing texts:   3%|▎         | 6/209 [16:11<9:07:45, 161.90s/it]

The full name of the organization is:

VLAAMS AUDIOVISUEEL FONDS


Processing texts:   3%|▎         | 7/209 [18:29<8:39:33, 154.33s/it]

The full name of the organization is: VITO (Vlaamse Innovatielab).


Processing texts:   4%|▍         | 8/209 [21:34<9:09:36, 164.06s/it]

The full name of the organization is:

Vlaamse Adviescommissie voor Volksraadplegingen (Flemish Advisory Committee for Local Government)

The commissie members are listed as follows:

* Dr. Professor Wouter Van Dooren
* Mevrouw Karen Deckers, Adjunct to the Director at the Agency for Internal Affairs
* Stef Keunen, Adjunct to the Director at the Agency for Internal Affairs
* Placeholders:
	+ Sofie Mariën, Dr. Professor at KULeuven
	+ Sofie Hennau, Dr. Professor at UHasselt
	+ Edwin Lefebre, Adjunct to the Director at the Agency for Internal Affairs
	+ Thomas Van Langenhove, Advisor at the Agency for Internal Affairs

The Secretary was initially appointed as Mevrouw Kristel Croonen, Head of Department Local Organization and Operations at the Agency for Internal Affairs. However, on 1 December 2022, Ottilia Tothèzan resigned as her placeholder, and Lieven Henckens, Adjunct to the Director at the Agency for Internal Affairs, took over.

On 30 March 2023, Dr. Professor Sofie Mariën resigne

Processing texts:   4%|▍         | 9/209 [23:14<7:59:46, 143.93s/it]

The full name of the organization is:

Vlaamse Gemeenschap (Flemish Community)


Processing texts:   5%|▍         | 10/209 [24:03<6:19:52, 114.53s/it]

The organization name is:

"Plantentuin Meise"


Processing texts:   5%|▌         | 11/209 [26:04<6:24:55, 116.64s/it]

The full name of the organization is:

"Toerisme Vlaanderen"


Processing texts:   6%|▌         | 12/209 [28:13<6:35:08, 120.35s/it]

The full name of the organization mentioned in the annual report is:

"Agentschap Integratie en Inburgering"

Translated to English, this means "Agency for Integration and Naturalization".


Processing texts:   6%|▌         | 13/209 [30:23<6:42:48, 123.31s/it]

De naam van de organisatie is: Overlegsorgaan voor Milieu- en Natuurbeheer (OVAM).


Processing texts:   7%|▋         | 14/209 [32:02<6:16:39, 115.90s/it]

De Stichting Commissie van Toezicht voor Jeugdinstellingen.


Processing texts:   7%|▋         | 15/209 [35:07<7:22:37, 136.90s/it]

The full name of the organization in its original language is:

"inter"


Processing texts:   8%|▊         | 16/209 [37:59<7:54:09, 147.41s/it]

The full name of the organization is:

"Departement Zorg"

(Note: The original language is Dutch.)


Processing texts:   8%|▊         | 17/209 [38:51<6:19:41, 118.65s/it]

The full name of the organization is:

"Vlaamse Ombudsdienst"


Processing texts:   9%|▊         | 18/209 [40:35<6:03:41, 114.25s/it]

The full name of the organization is:

Vlaams Parlement (Flemish Parliament)


Processing texts:   9%|▉         | 19/209 [43:03<6:33:48, 124.36s/it]

The full name of the organization mentioned in the report is:

Vlaamse Ombudsdienst (Flemish Ombudsman)


Processing texts:  10%|▉         | 20/209 [44:28<5:55:04, 112.72s/it]

The organization name is:

"Departement financiën en begroting"

In its original language, the full name of the organization is: "Departement financiën en begroting".


Processing texts:  10%|█         | 21/209 [47:16<6:44:43, 129.17s/it]

The full name of the organization is:

Vlaams Partnerschap Duale Leren ( Flemish Dual Learning Partnership)

And here are some key points to learn from this report:

1. The partnership aims to promote a network of quality learning workplaces for all learners.
2. The focus on time is important for the sectoral partnerschappen.
3. The focus on time is important for the partnershipment.
4. The focusplacement is important for the placement.
5.


Processing texts:  11%|█         | 22/209 [49:14<6:32:22, 125.89s/it]

The full name of the organization mentioned in the report is:

United Nations University – Center for Regional Integration Studies (UNU-CRIS)


Processing texts:  11%|█         | 23/209 [51:28<6:38:04, 128.41s/it]

Based on the Dutch annual report, the full name of the organization is:

"Vlaamse Universiteiten en Hogescholen (VLIR)"


Processing texts:  11%|█▏        | 24/209 [54:16<7:12:09, 140.16s/it]

The full name of the organization is: "Agentschap Onderwijs Vlaanderen" ( Flemish Agency for Education).


Processing texts:  12%|█▏        | 25/209 [56:17<6:52:21, 134.46s/it]

The organization name is:

"Kinderrechtencommissariaat"


Processing texts:  12%|█▏        | 26/209 [59:40<7:52:42, 154.99s/it]

The full name of the organization in its original language is:

" Agentschap voor Hoger Onderwijs, Volwassenen Onderwijs en Beroepsonderwijs" (Agency for Higher Education, Adult Education and Vocational Training)


Processing texts:  13%|█▎        | 27/209 [1:02:46<8:18:28, 164.33s/it]

The full name of the organization is "Pedagogisch Project Groep Onderwijs" (PPGO).


Processing texts:  13%|█▎        | 28/209 [1:05:53<8:35:40, 170.94s/it]

The full name of the organization mentioned in the annual report is:

"Agentschap Vlaamse Sociale Bescherming (AVSB)"


Processing texts:  14%|█▍        | 29/209 [1:09:01<8:48:31, 176.18s/it]

The full name of the organization is:

Vlaams Brussellofficieel Fonds (VBF)

And, as you requested, here's the translation for "de Vlaamse Gemeenschap" which means:

Flemish Community

Let me know if you have any further requests!


Processing texts:  14%|█▍        | 30/209 [1:11:19<8:11:44, 164.83s/it]

The full name of the organization mentioned in the report is:

"Vlaamse Hogeschoolraad" (Flemish Higher Education Council)


Processing texts:  15%|█▍        | 31/209 [1:14:50<8:49:49, 178.59s/it]

The full name of the organization is:

"VZW Sociaal Dienst voor het Vlaams Overheidspersoneel"

(Note: VZW stands for "Vrijwilligers Werkelijke Zakelijkheden" which translates to "Non-Profit Organization".)


Processing texts:  15%|█▌        | 32/209 [1:16:46<7:51:28, 159.82s/it]

The organization name is:

"Departement Werk & Sociale Economie"


Processing texts:  16%|█▌        | 33/209 [1:19:22<7:45:09, 158.58s/it]

The full name of the organization mentioned in the annual report is:

**Vlaamse Dienstverlening BeroepsActief (VDAB)**

Translation: Flemish Employment and Skills Agency.


Processing texts:  16%|█▋        | 34/209 [1:22:02<7:43:40, 158.98s/it]

The full name of the organization is:

"Defensie-Controle Strategische Goederen van het Departement Kanselarij & Buitenlandse Zaken"

Translated to English, this becomes:

"Strategic Goods Control Unit of the Department of Chancellery & Foreign Affairs"


Processing texts:  17%|█▋        | 35/209 [1:24:30<7:31:56, 155.84s/it]

The full name of the organization is:

Waterbouwkundig Laboratorium (WLR)

Note: WLR is a Dutch research institution focused on water-related topics.


Processing texts:  17%|█▋        | 36/209 [1:27:57<8:13:12, 171.06s/it]

The full name of the organization is:

"Departement Landbouw & Visserij Vlaanderen"


Processing texts:  18%|█▊        | 37/209 [1:30:49<8:11:41, 171.52s/it]

Based on the Dutch annual report, I have identified the full name of the organization as:

"Federale Overheid van België - Vlaamse Regering" (Federal Government of Belgium - Flemish Government)

Please note that this is the official name in Dutch. The English translation would be "Federal Government of Belgium - Flemish Government".


Processing texts:  18%|█▊        | 38/209 [1:32:43<7:19:06, 154.07s/it]

The full name of the organization mentioned in the Dutch annual report is:

"Vlaamse Regering" or "Flemish Government".


Processing texts:  19%|█▊        | 39/209 [1:34:09<6:18:46, 133.69s/it]

The organization name is:

Staatsdienst voor Openbaar Vervoer (SOV)


Processing texts:  19%|█▉        | 40/209 [1:36:05<6:01:28, 128.33s/it]

The full name of the organization is:

"Dienst Economische Migratie"


Processing texts:  20%|█▉        | 41/209 [1:38:58<6:36:47, 141.71s/it]

The full name of the organization is:

Agentschap voor Woon- en Zorginfrastructuurbeleid voor Vlaams-Brabant (VLABINVEST)


Processing texts:  20%|██        | 42/209 [1:42:16<7:21:33, 158.64s/it]

The full name of the organization is:

"Departement Welzijn, Volksgezondheid en Gezin"

(Note: I only extracted the organization's name from the provided text and did not read or analyze any other information.)


Processing texts:  21%|██        | 43/209 [1:44:27<6:55:57, 150.34s/it]

The full name of the organization mentioned in the annual report is:

"Agentschap Binnenlands Bestuur, Vlaamse Overheid"

Translated to English, this becomes:

"Flemish Government Agency for Administrative Support"


Processing texts:  21%|██        | 44/209 [1:47:14<7:07:23, 155.42s/it]

The full name of the organization is not explicitly mentioned in the report, but it appears to be an energy management organization or a collective of companies involved in energy efficiency and reduction.

According to the report, the energieverbruik (energy consumption) in 2014 was not specified. However, the report mentions that the cumulative primary energy saving achieved by the EBO-bedrijven (Energy Business Organizations) from 2015-2018 exceeded the planned amount, resulting in a savings of 21.7 pjp (primary energy units).

Please note that the report is written in Dutch, and I have translated the relevant information for you. If you would like me to provide more specific data or clarify any points, please let me know!


Processing texts:  22%|██▏       | 45/209 [1:49:48<7:03:44, 155.03s/it]

The full name of the organization mentioned in the annual report is:

Raad voor beroepenstudievoortgangsbeslissingen (Council for Examination Appeals)

The translation of "bevraging van de raad verder toegenomen" is: "Review of the Council's further development".

Please note that I only translated the full name of the organization and the provided phrase, not the entire text.


Processing texts:  22%|██▏       | 46/209 [1:51:58<6:40:23, 147.39s/it]

The full name of the organization is:

"Departement Financiën en Begroting"

(Note: I did not read or analyze any other information from the annual report, only providing the organization's name)


Processing texts:  22%|██▏       | 47/209 [1:55:11<7:15:16, 161.22s/it]

The organization name is:

"Staatssecretariaat voor Cultuur, Jeugd en Media" (in Dutch)


Processing texts:  23%|██▎       | 48/209 [1:57:58<7:16:51, 162.81s/it]

The organization name is:

"Jeugdhulp" (in Dutch)


Processing texts:  23%|██▎       | 49/209 [2:00:33<7:08:22, 160.64s/it]

The full name of the organization is:

"Euclides Commissie Hoger Onderwijs"


Processing texts:  24%|██▍       | 50/209 [2:03:16<7:07:06, 161.17s/it]

The full name of the organization is:

Centraal Bureau voor de Statistiek (CBS) - Centrum voor Sociaal Beleid (CSB)

Translated to English, it means: "Central Bureau for Statistics" and "Centre for Social Policy".


Processing texts:  24%|██▍       | 51/209 [2:06:12<7:16:05, 165.60s/it]

The full name of the organization is:

"Programma Voor Plattelandsontwikkeling Vlaanderen 2014-2020 (PDPo II)"


Processing texts:  25%|██▍       | 52/209 [2:08:47<7:05:01, 162.43s/it]

The full name of the organization mentioned in the annual report is:

"Hoge Handhavingsraad voor Ruimte en Milieu" (High Authority for Space and Environment)


Processing texts:  25%|██▌       | 53/209 [2:11:34<7:06:24, 164.00s/it]

The full name of the organization mentioned in the annual report is:

"Vlaams Hoogereenschikkelen Raad voor Ruimtelijke Ordening en Milieu (VHRM)"


Processing texts:  26%|██▌       | 54/209 [2:13:43<6:36:03, 153.31s/it]

The full name of the organization is:

"Vlaamse Landmaatschappij"

Translated to English, this means "Flemish Land Agency".


Processing texts:  26%|██▋       | 55/209 [2:16:53<7:02:21, 164.56s/it]

The full name of the organization is: "Topstukkenfonds".
