# CSV to JSON Processor (Structured Data)

This notebook processes a CSV file and converts its entire content into a JSON file.

### Step 1: Import Necessary Libraries

In [None]:
%pip install pandas

In [None]:
import json
import os
import re
import pandas as pd

### Step 2: Load the CSV File

In [None]:
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import os
from tkinter import messagebox

selected_file_path = None
df_preview = None

def select_file():
    global selected_file_path, df_preview

    file_path = filedialog.askopenfilename(
        title="Select a CSV File",
        filetypes=(("CSV Files", "*.csv"), ("All Files", "*.*"))
    )

    if file_path:
        selected_file_path = file_path
        label.config(text=os.path.basename(file_path))
        
        try:
            # Try utf-8 first, fallback to ISO-8859-1
            try:
                df_preview = pd.read_csv(file_path, encoding='utf-8')
            except UnicodeDecodeError:
                df_preview = pd.read_csv(file_path, encoding='ISO-8859-1')

            # Show first few rows in the text widget
            preview_text = df_preview.head().to_string()
            text_widget.delete(1.0, tk.END)
            text_widget.insert(tk.END, preview_text)
            button_confirm.pack(pady=10)

        except Exception as e:
            messagebox.showerror("Error", f"Failed to load CSV:\n{e}")
            label.config(text="No file selected")
            text_widget.delete(1.0, tk.END)
            button_confirm.pack_forget()
    else:
        label.config(text="No file selected")
        button_confirm.pack_forget()

def confirm_file():
    if selected_file_path:
        window.destroy()
    else:
        messagebox.showwarning("No Selection", "Please select a file before confirming.")

# --- GUI Setup ---
window = tk.Tk()
window.title("Select a CSV File")
window.geometry("800x500+100+100")

label = tk.Label(window, text="No file selected", width=100)
label.pack(pady=20)

button_select = tk.Button(window, text="Select File", command=select_file)
button_select.pack(pady=10)

button_confirm = tk.Button(window, text="Load File", command=confirm_file)

text_widget = tk.Text(window, width=100, height=15)
text_widget.pack(pady=10)

window.mainloop()

# --- After GUI closes ---
if selected_file_path:
    print(f"\nConfirmed CSV file path: {selected_file_path}")
    print("\nCSV File Loaded Successfully:")
    print(df_preview.head())
else:
    print("\nNo file selected.")


### Step 3: Data Cleaning

In [None]:
allowed_pattern = re.compile(r"[^\w\s.,!?;:'\"()\[\]\/-]")

# Unicode-safe strip function
def clean_string(text):
    cleaned = allowed_pattern.sub('', text)
    return cleaned.strip().replace('\u00A0', '').strip()

# Copy df_preview safely
df = df_preview.copy()

# Replace NaN with None
df = df.where(pd.notnull(df), None)

# Convert the DataFrame to a JSON structure
json_data = df.to_dict(orient="records")


# Clean each string in the record
for record in json_data:
    for key, value in record.items():
        if isinstance(value, str):
            cleaned_value = clean_string(value.strip())
            if ';' in cleaned_value:
                record[key] = [item.strip() for item in cleaned_value.split(';') if item.strip()]
            else:
                record[key] = cleaned_value

# Display the JSON data
print("Converted JSON Data:")
print(json.dumps(json_data, indent=4))

### Step 4: Save the JSON Data to a File

In [None]:
### Step 4: Save JSON File
csv_filename_only = os.path.splitext(os.path.basename(selected_file_path))[0]

output_file_path = f"../../data/processed/processed_{csv_filename_only}.json"
os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

with open(output_file_path, "w", encoding="utf-8") as json_file:
    json.dump(json_data, json_file, indent=4, ensure_ascii=False)

print(f"JSON data saved to {output_file_path}")


### - END