# Llama Impact Hackathon
This notebook contains the work for the Llama Impact Hackathon 2024.
We will use the public complaints from the website https://ndreqe.com/reports



# Importing Libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import urllib.parse

In [None]:
# Read the config file for Together AI API key
import json
with open('configs.json', 'r') as file:
    config = json.load(file)

print('Configs Read!')

# Scraping the Data

In [None]:
# Scraping code here

# Reading the Data

In [5]:
# Read csv file
df = pd.read_csv('ndreqe.csv')

# Extract fields
df['municipality'] = df['web-scraper-start-url'].iloc[0][36:-10]
df['complaint_number'] = df['complaint-href'].str.rsplit('/', n=1).str[-1].astype(int)
df['image_url'] = df['images-src'].astype(str).apply(lambda x: 'https://ndreqe.com'+x)
df['image_url'] = df['image_url'].str.replace(r'(\.0)(\.jpeg)', r'\1.full\2', regex=True)
df['complaint'] = df['complaint'].apply(lambda x: x.replace('\n',' '))
df.rename(columns={'text':'title', 'tex2':'user_text'}, inplace=True)
df['municipality'] = df['municipality'].apply(lambda x: urllib.parse.unquote(x))
df.head()

Unnamed: 0,web-scraper-order,web-scraper-start-url,complaint,complaint-href,title,user_text,images-src,municipality,complaint_number,image_url
0,1731179571-1,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 18:50, 7 Mar 2017, ...",https://ndreqe.com/report/171,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga ne lokacionin e shenuar nuk eshte shtru...,/photo/171.0.jpeg?876fef51,Prishtinës,171,https://ndreqe.com/photo/171.0.full.jpeg?876fef51
1,1731179574-2,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.0.jpeg?eefb909c,Prishtinës,189,https://ndreqe.com/photo/189.0.full.jpeg?eefb909c
2,1731179574-3,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.1.jpeg?dd7ac08d,Prishtinës,189,https://ndreqe.com/photo/189.1.jpeg?dd7ac08d
3,1731179577-4,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.0.jpeg?e7281615,Prishtinës,365,https://ndreqe.com/photo/365.0.full.jpeg?e7281615
4,1731179577-5,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.1.jpeg?6898da66,Prishtinës,365,https://ndreqe.com/photo/365.1.jpeg?6898da66


In [6]:
df['web-scraper-start-url'].iloc[0]

'https://ndreqe.com/reports/Komuna+e+Prishtin%C3%ABs?t=unfixed'

In [7]:
df['web-scraper-start-url'].iloc[0][36:-10]

'Prishtin%C3%ABs'

In [8]:
df.shape

(1469, 10)

In [9]:
df.columns

Index(['web-scraper-order', 'web-scraper-start-url', 'complaint',
       'complaint-href', 'title', 'user_text', 'images-src', 'municipality',
       'complaint_number', 'image_url'],
      dtype='object')

In [10]:
df['web-scraper-start-url'].iloc[0]

'https://ndreqe.com/reports/Komuna+e+Prishtin%C3%ABs?t=unfixed'

In [11]:
df['web-scraper-start-url'].value_counts()

Unnamed: 0_level_0,count
web-scraper-start-url,Unnamed: 1_level_1
https://ndreqe.com/reports/Komuna+e+Prishtin%C3%ABs?t=unfixed,1469


In [12]:
df.iloc[1].title

'Raportuar në kategorinë Ndriçim publik nga anonim në 17:09, Pre 10 mars 2017'

In [13]:
df.iloc[1].user_text

'Drita nuk punon prej se eshte instaluar.'

# Translation

In [14]:
!pip install together

Collecting together
  Downloading together-1.3.3-py3-none-any.whl.metadata (11 kB)
Downloading together-1.3.3-py3-none-any.whl (68 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/68.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.1/68.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: together
Successfully installed together-1.3.3


In [17]:
# Set up the LLM Client
from together import Together

client = Together(api_key=config['api_key'])
translation_model = "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo"

In [24]:
translation_system_prompt = 'Translate the following text to English. The answer should only contain the translated text, nothing else. The text is: '
text = """*Per hudhjen e mbeturinave ne kete vend kam njoftuar komunen se paku 3 here. Zyrtari i komunes ka vizituar vendin dhe ka premtuar se do te me njoftoje zyrtarisht per veprimet e metutjeshme te komunes. Deri me tani une nuk kam pranuar pergjigjje te tille (nje arsyetim se qenka prone e dikujt tjeter dhe se komuna nuk "paska te drejte" te pastroje token e huaj me eshte dhene permes telefonit. Asnje mirembajtje, sanim i ketij demi apo ndonje hap i metejshem nuk eshte nderrmare. Perndryshe ky vendbanim eshte ne nevoje urgjente edhe te nje rruge (per qasje ne objekt) si dhe per nje vendkalim te ngritur te kembesoreve per shkak te rrezikut te kalimit te rruges ne kete pjese!"""#"Raportuar në kategorinë Ndriçim publik nga anonim në 17:09, Pre 10 mars 2017"

# API call
response = client.chat.completions.create(
    model=translation_model,
    messages=[
        {
                "role": "user",
                "content": translation_system_prompt + text
        }
],
    temperature=0.5
)

translation = response.choices[0].message.content
print(translation)

Due to the accumulation of waste in this place, I have informed the commune at least 3 times. The commune official visited the site and promised to officially notify me of the commune's necessary actions. So far, I have not received such a response (an explanation that it is someone else's property and that the commune does not have the "right" to clean up someone else's land was given to me over the phone. No maintenance, cleaning of this area, or any further action has been taken. Otherwise, this settlement is in urgent need of a road (for access to the object) as well as a pedestrian crossing due to the danger of crossing the road in this part!


In [26]:
# Package the translation code into a function
def translate_text(text_to_translate):
  translation_system_prompt = 'Translate the following text to English. The answer should only contain the translated text, nothing else. The text is: '

  # API call
  response = client.chat.completions.create(
      model=translation_model,
      messages=[
          {
                  "role": "user",
                  "content": translation_system_prompt + text_to_translate
          }
  ],
      temperature=0.5
  )

  translation = response.choices[0].message.content
  time.sleep(1)
  return translation

print('Translation Function defined!')

Translation Function defined!


In [27]:
df.iloc[5].user_text

'*Per hudhjen e mbeturinave ne kete vend kam njoftuar komunen se paku 3 here. Zyrtari i komunes ka vizituar vendin dhe ka premtuar se do te me njoftoje zyrtarisht per veprimet e metutjeshme te komunes. Deri me tani une nuk kam pranuar pergjigjje te tille (nje arsyetim se qenka prone e dikujt tjeter dhe se komuna nuk "paska te drejte" te pastroje token e huaj me eshte dhene permes telefonit. Asnje mirembajtje, sanim i ketij demi apo ndonje hap i metejshem nuk eshte nderrmare. Perndryshe ky vendbanim eshte ne nevoje urgjente edhe te nje rruge (per qasje ne objekt) si dhe per nje vendkalim te ngritur te kembesoreve per shkak te rrezikut te kalimit te rruges ne kete pjese!'

In [29]:
# Obtaining subset of data - sample_df - to be used for testing throughout the notebook
sample_df = df.head(100).copy()
sample_df.tail()

Unnamed: 0,web-scraper-order,web-scraper-start-url,complaint,complaint-href,title,user_text,images-src,municipality,complaint_number,image_url
95,1731179795-96,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:06, 27 Shk 2018",https://ndreqe.com/report/2669,Raportuar në kategorinë Mbeturina nga anonim n...,"*Matiqan, Prishtinë, Rrafsh i Kosovës.",/photo/2669.0.jpeg?2f1a4f38,Prishtinës,2669,https://ndreqe.com/photo/2669.0.full.jpeg?2f1a...
96,1731179798-97,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:11, 27 Shk 2018",https://ndreqe.com/report/2670,Raportuar në kategorinë Mbeturina nga anonim n...,"*Muharrem Fejza, Matiqan, Prishtinë, Rrafsh i ...",/photo/2670.0.jpeg?157f488a,Prishtinës,2670,https://ndreqe.com/photo/2670.0.full.jpeg?157f...
97,1731179801-98,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:13, 27 Shk 2018",https://ndreqe.com/report/2671,Raportuar në kategorinë Mbeturina nga anonim n...,"*Muharrem Fejza, Matiqan, Prishtinë, Rrafsh i ...",/photo/2671.0.jpeg?f286bf09,Prishtinës,2671,https://ndreqe.com/photo/2671.0.full.jpeg?f286...
98,1731179804-99,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:16, 27 Shk 2018",https://ndreqe.com/report/2672,Raportuar në kategorinë Mbeturina nga anonim n...,"*Ymer Elshani, Mati 1, Prishtinë, Rrafsh i Kos...",/photo/2672.0.jpeg?8ce0fd4b,Prishtinës,2672,https://ndreqe.com/photo/2672.0.full.jpeg?8ce0...
99,1731179807-100,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:18, 27 Shk 2018",https://ndreqe.com/report/2673,Raportuar në kategorinë Mbeturina nga anonim n...,"*Holger Petersen, Lagjja e Spitalit, Prishtinë...",/photo/2673.0.jpeg?85dd2582,Prishtinës,2673,https://ndreqe.com/photo/2673.0.full.jpeg?85dd...


In [30]:
sample_df['complaint_en'] = sample_df['complaint'].apply(translate_text)
sample_df['title_en'] = sample_df['title'].apply(translate_text)
sample_df['user_text_en'] = sample_df['user_text'].apply(translate_text)

sample_df

Unnamed: 0,web-scraper-order,web-scraper-start-url,complaint,complaint-href,title,user_text,images-src,municipality,complaint_number,image_url,complaint_en,title_en,user_text_en
0,1731179571-1,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 18:50, 7 Mar 2017, ...",https://ndreqe.com/report/171,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga ne lokacionin e shenuar nuk eshte shtru...,/photo/171.0.jpeg?876fef51,Prishtinës,171,https://ndreqe.com/photo/171.0.full.jpeg?876fef51,"Public Domain 18:50, 7 Mar 2017, ...",Reported in Public Illumination category by an...,The street with the designated location is not...
1,1731179574-2,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.0.jpeg?eefb909c,Prishtinës,189,https://ndreqe.com/photo/189.0.full.jpeg?eefb909c,"Public domain 17:09, 10 Mar 2017, ...",Reported in the Public Lighting category by an...,The light does not work since it was installed.
2,1731179574-3,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.1.jpeg?dd7ac08d,Prishtinës,189,https://ndreqe.com/photo/189.1.jpeg?dd7ac08d,"Public release 17:09, 10 Mar 2017, ...",Reported in the Public Lighting category by an...,The light does not work since it was installed.
3,1731179577-4,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.0.jpeg?e7281615,Prishtinës,365,https://ndreqe.com/photo/365.0.full.jpeg?e7281615,"Page protected 04:57, 2 Apr 2017, ...",Reported in Public Lighting category by anonym...,Ferid Curri Street does not have public lighting!
4,1731179577-5,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.1.jpeg?6898da66,Prishtinës,365,https://ndreqe.com/photo/365.1.jpeg?6898da66,"Public domain 04:57, 2 Apr 2017, ...",Reported in the Public lighting category by an...,Ferid Curri Street has no public lighting!
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1731179795-96,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:06, 27 Shk 2018",https://ndreqe.com/report/2669,Raportuar në kategorinë Mbeturina nga anonim n...,"*Matiqan, Prishtinë, Rrafsh i Kosovës.",/photo/2669.0.jpeg?2f1a4f38,Prishtinës,2669,https://ndreqe.com/photo/2669.0.full.jpeg?2f1a...,"Mbeturina 08:06, 27 Aug 2018",Reported in the Litter category by anonymous a...,"Matiqan, Pristina, Kosovo Plain."
96,1731179798-97,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:11, 27 Shk 2018",https://ndreqe.com/report/2670,Raportuar në kategorinë Mbeturina nga anonim n...,"*Muharrem Fejza, Matiqan, Prishtinë, Rrafsh i ...",/photo/2670.0.jpeg?157f488a,Prishtinës,2670,https://ndreqe.com/photo/2670.0.full.jpeg?157f...,"Mbeturina 08:11, 27 Aug 2018",Reported in category Debris by anonymous at 08...,"Muharrem Fejza, Matiqan, Pristina, Kosovo Plain."
97,1731179801-98,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:13, 27 Shk 2018",https://ndreqe.com/report/2671,Raportuar në kategorinë Mbeturina nga anonim n...,"*Muharrem Fejza, Matiqan, Prishtinë, Rrafsh i ...",/photo/2671.0.jpeg?f286bf09,Prishtinës,2671,https://ndreqe.com/photo/2671.0.full.jpeg?f286...,"Mbeturina 08:13, 27 Sep 2018",Reported in the Litter category by anonymous a...,"Muharrem Fejza, Matiqan, Prishtina, Kosovo Plain."
98,1731179804-99,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Mbeturina 08:16, 27 Shk 2018",https://ndreqe.com/report/2672,Raportuar në kategorinë Mbeturina nga anonim n...,"*Ymer Elshani, Mati 1, Prishtinë, Rrafsh i Kos...",/photo/2672.0.jpeg?8ce0fd4b,Prishtinës,2672,https://ndreqe.com/photo/2672.0.full.jpeg?8ce0...,"Mbeturina 08:16, 27 Sep 2018",Reported in the category Litter by anonymous a...,"Ymer Elshani, March 1, Pristina, Kosovo Plain."


In [None]:
# for loop based code
# Initialize the new column with default values (e.g., None or 0)
sample_df['title_en2'] = None

# Iterate over the DataFrame rows using a for loop
for index, row in sample_df.iterrows():
    # Apply the function on the selected columns and assign the result
    sample_df.at[index, 'title_en2'] = translate_text(row['title'])

print(sample_df)

# works almost the same as apply

In [41]:
# Fetch date_created and date_modified
# Clean up extra spaces and handle case insensitivity
sample_df['complaint_en'] = sample_df['complaint_en'].str.replace(r'\s+', ' ', regex=True)  # Replace multiple spaces with single spaces
sample_df['complaint_en'] = sample_df['complaint_en'].str.strip()  # Remove leading/trailing spaces

# Regular expression to capture the two datetime patterns
sample_df[['date_created', 'date_modified']] = sample_df['complaint_en'].str.extract(
    r'(\d{2}:\d{2}, \d+ \w+ \d{4}).*?(\d{2}:\d{2}, \d+ \w+ \d{4})'
)

# Convert to datetime format
sample_df['date_created'] = pd.to_datetime(sample_df['date_created'], errors='coerce')
sample_df['date_modified'] = pd.to_datetime(sample_df['date_modified'], errors='coerce')

# Display the DataFrame
sample_df[['complaint_en','date_created', 'date_modified']].head()

Unnamed: 0,complaint_en,date_created,date_modified
0,"Public Domain 18:50, 7 Mar 2017, Last Update 1...",2017-03-07 18:50:00,2017-03-09 16:00:00
1,"Public domain 17:09, 10 Mar 2017, Last update ...",2017-03-10 17:09:00,2017-03-17 08:37:00
2,"Public release 17:09, 10 Mar 2017, Last update...",2017-03-10 17:09:00,2017-03-17 08:37:00
3,"Page protected 04:57, 2 Apr 2017, Last updated...",2017-04-02 04:57:00,2017-04-03 09:22:00
4,"Public domain 04:57, 2 Apr 2017, Last updated ...",2017-04-02 04:57:00,2017-04-03 09:22:00


In [46]:
sample_df.to_csv('sample_df-1-translation.csv', index=False)
print('File saved successfully!')

File saved successfully!


In [44]:
sample_df.drop(columns=['title_en2'], inplace=True)
sample_df.head()

Unnamed: 0,web-scraper-order,web-scraper-start-url,complaint,complaint-href,title,user_text,images-src,municipality,complaint_number,image_url,complaint_en,title_en,user_text_en,date_created,date_modified
0,1731179571-1,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 18:50, 7 Mar 2017, ...",https://ndreqe.com/report/171,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga ne lokacionin e shenuar nuk eshte shtru...,/photo/171.0.jpeg?876fef51,Prishtinës,171,https://ndreqe.com/photo/171.0.full.jpeg?876fef51,"Public Domain 18:50, 7 Mar 2017, Last Update 1...",Reported in Public Illumination category by an...,The street with the designated location is not...,2017-03-07 18:50:00,2017-03-09 16:00:00
1,1731179574-2,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.0.jpeg?eefb909c,Prishtinës,189,https://ndreqe.com/photo/189.0.full.jpeg?eefb909c,"Public domain 17:09, 10 Mar 2017, Last update ...",Reported in the Public Lighting category by an...,The light does not work since it was installed.,2017-03-10 17:09:00,2017-03-17 08:37:00
2,1731179574-3,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 17:09, 10 Mar 2017, ...",https://ndreqe.com/report/189,Raportuar në kategorinë Ndriçim publik nga ano...,Drita nuk punon prej se eshte instaluar.,/photo/189.1.jpeg?dd7ac08d,Prishtinës,189,https://ndreqe.com/photo/189.1.jpeg?dd7ac08d,"Public release 17:09, 10 Mar 2017, Last update...",Reported in the Public Lighting category by an...,The light does not work since it was installed.,2017-03-10 17:09:00,2017-03-17 08:37:00
3,1731179577-4,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.0.jpeg?e7281615,Prishtinës,365,https://ndreqe.com/photo/365.0.full.jpeg?e7281615,"Page protected 04:57, 2 Apr 2017, Last updated...",Reported in Public Lighting category by anonym...,Ferid Curri Street does not have public lighting!,2017-04-02 04:57:00,2017-04-03 09:22:00
4,1731179577-5,https://ndreqe.com/reports/Komuna+e+Prishtin%C...,"Ndriçim publik 04:57, 2 Pri 2017, ...",https://ndreqe.com/report/365,Raportuar në kategorinë Ndriçim publik nga ano...,*Rruga Ferid Curri nuk ka fare ndriçim publik!,/photo/365.1.jpeg?6898da66,Prishtinës,365,https://ndreqe.com/photo/365.1.jpeg?6898da66,"Public domain 04:57, 2 Apr 2017, Last updated ...",Reported in the Public lighting category by an...,Ferid Curri Street has no public lighting!,2017-04-02 04:57:00,2017-04-03 09:22:00
