In [1]:
import os
import json

# Folder containing JSON files
folder_path = "scrap"

# List to store all loaded JSON data
all_data = []

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(folder_path, filename)
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                all_data.append(data)
        except json.JSONDecodeError:
            print(f"⚠️ Error decoding JSON in file: {filename}")
        except Exception as e:
            print(f"⚠️ Error reading {filename}: {e}")

print(f"✅ Loaded {len(all_data)} JSON files.")


✅ Loaded 183 JSON files.


In [7]:
import tqdm

In [None]:
reviews = []

for data in tqdm.tqdm(all_data):
    for r in data['scrap']['reviews']:
        reviews.append(r['positive_text'])
        reviews.append(r['negative_text'])

100%|██████████| 183/183 [00:00<00:00, 3367.68it/s]


In [34]:
import requests

response = requests.post("http://raspberrypi:8000/predict", json={"input": "Lunette des WC mal fixée"})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")


Status: 200
Response:
 [{"topic":"salle_de_bain.douche.toilette","score":1},{"topic":"fonctionner.fonctionnel.equipee","score":0.892562985420227},{"topic":"odeur.sale.sol","score":0.8214584589004517},{"topic":"fenetre.rideau.volet","score":0.7584702372550964},{"topic":"lumiere.eclairage.lamper","score":0.7556272745132446},{"topic":"tv.tele.television","score":0.674028754234314},{"topic":"lit.literie.matelas","score":0.6177504062652588},{"topic":"propre.proprete.menage","score":0.6090435981750488},{"topic":"equipement.mobilier.meuble","score":0.5471805334091187},{"topic":"manque.dommage.probleme","score":0.5259737968444824},{"topic":"valise.bagage.affaire","score":0.5226792097091675},{"topic":"climatisation.clim.chauffage","score":0.4878149628639221},{"topic":"mauvais.etoiler.decu","score":0.4725894033908844},{"topic":"bruit.bruyant.entendre","score":0.4689839482307434},{"topic":"couloir.ascenseur.etage","score":0.4394455850124359},{"topic":"entree.telephoner.hall","score":0.41172194480

In [33]:
for r in tqdm.tqdm(reviews):
    requests.post("http://raspberrypi:8000/predict", json={"input": r})
    requests.post("http://raspberrypi:8000/predict", json={"input": r})

  0%|          | 78/443556 [00:06<9:31:12, 12.94it/s] 


KeyboardInterrupt: 

In [39]:
import os
import json
import requests
import tqdm

# === CONFIG ===
INPUT_FOLDER = "scrap"
OUTPUT_FOLDER = "scrap_out"
API_URL = "http://raspberrypi:8000/predict"

# === SETUP ===
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Cache for already predicted texts
prediction_cache = {}

# === PROCESS EACH JSON FILE SEPARATELY ===
json_files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".json")]

for filename in tqdm.tqdm(json_files, desc="Processing JSON files"):
    input_path = os.path.join(INPUT_FOLDER, filename)
    output_path = os.path.join(OUTPUT_FOLDER, filename)

    try:
        with open(input_path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError:
        print(f"⚠️ Error decoding JSON in file: {filename}")
        continue
    except Exception as e:
        print(f"⚠️ Error reading {filename}: {e}")
        continue

    # Process reviews
    reviews = data.get("scrap", {}).get("reviews", [])
    updated_reviews = []
    for review in reviews:
        review['positive_topics'] = []
        review['negative_topics'] = []
        language = review.get("language", "")
        if language != "fr": continue

        for sent, text in [('positive', review['positive_text']), ('negative', review['negative_text'])]:
            # Check cache
            if type(text) != str: continue 
            if text in prediction_cache:
                predictions = prediction_cache[text]
            else:
                try:
                    response = requests.post(API_URL, json={"input": text}, timeout=10)
                    if response.status_code == 200:
                        predictions = response.json()
                        prediction_cache[text] = predictions
                    else:
                        print(f"⚠️ API error {response.status_code} for review {text} in {filename}")
                        predictions = []
                except Exception as e:
                    print(f"⚠️ Request error for review {text} in {filename}: {e}")
                    predictions = []

            review[f'{sent}_topics'] = [p['topic'] for p in predictions if p['score'] > 0.8]

    # Save updated JSON
    try:
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    except Exception as e:
        print(f"⚠️ Error saving {filename}: {e}")

print("✅ All JSON files processed and saved in 'scrap_out/' folder.")


Processing JSON files:   1%|          | 2/183 [00:35<53:36, 17.77s/it]


KeyboardInterrupt: 