In [1]:
import os
import json
import sqlite3
from tqdm import tqdm

# 1. Dossier contenant les fichiers JSON
folder_path = "C:/Users/pierr/OneDrive/Bureau/B3/Projet_python/data"
# 2. Connexion à la base SQLite
db_path = "C:/Users/pierr/OneDrive/Bureau/test.db"
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# 3. Création des tables
cursor.execute('''
CREATE TABLE IF NOT EXISTS Playlist (
    pid INTEGER PRIMARY KEY,
    name TEXT,
    collaborative BOOLEAN,
    modified_at BIGINT,
    num_tracks INTEGER,
    num_albums INTEGER,
    num_followers INTEGER
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Track (
    track_uri TEXT PRIMARY KEY,
    track_name TEXT,
    duration_ms INTEGER,
    artist_uri TEXT,
    album_uri TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Artist (
    artist_uri TEXT PRIMARY KEY,
    artist_name TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Album (
    album_uri TEXT PRIMARY KEY,
    album_name TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS PlaylistTrack (
    playlist_id INTEGER,
    track_uri TEXT,
    position INTEGER,
    PRIMARY KEY (playlist_id, position),
    FOREIGN KEY (playlist_id) REFERENCES Playlist(pid),
    FOREIGN KEY (track_uri) REFERENCES Track(track_uri)
)
''')

conn.commit()

# 4. Traitement des fichiers JSON
json_files = [f for f in os.listdir(folder_path) if f.endswith(".json")]
print(f"{len(json_files)} fichiers trouvés dans {folder_path}")

for filename in tqdm(json_files, desc="Importation JSON → SQL"):
    file_path = os.path.join(folder_path, filename)

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Adaptation à la structure réelle
        playlists = data.get("playlists")
        if playlists is None:
            if isinstance(data, list):
                playlists = data
            elif isinstance(data, dict) and "pid" in data:
                playlists = [data]
            else:
                print(f"Format inattendu dans le fichier : {filename}")
                continue

        for playlist in playlists:
            pid = playlist.get("pid")
            if pid is None:
                continue

            # Playlist
            cursor.execute("""
                INSERT OR IGNORE INTO Playlist (pid, name, collaborative, modified_at, num_tracks, num_albums, num_followers)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            """, (
                pid,
                playlist.get("name"),
                playlist.get("collaborative") == "true",
                playlist.get("modified_at"),
                playlist.get("num_tracks"),
                playlist.get("num_albums"),
                playlist.get("num_followers")
            ))

            for track in playlist.get("tracks", []):
                # Artist
                cursor.execute("""
                    INSERT OR IGNORE INTO Artist (artist_uri, artist_name)
                    VALUES (?, ?)
                """, (
                    track.get("artist_uri"),
                    track.get("artist_name")
                ))

                # Album
                cursor.execute("""
                    INSERT OR IGNORE INTO Album (album_uri, album_name)
                    VALUES (?, ?)
                """, (
                    track.get("album_uri"),
                    track.get("album_name")
                ))

                # Track
                cursor.execute("""
                    INSERT OR IGNORE INTO Track (track_uri, track_name, duration_ms, artist_uri, album_uri)
                    VALUES (?, ?, ?, ?, ?)
                """, (
                    track.get("track_uri"),
                    track.get("track_name"),
                    track.get("duration_ms"),
                    track.get("artist_uri"),
                    track.get("album_uri")
                ))

                # PlaylistTrack
                cursor.execute("""
                    INSERT OR IGNORE INTO PlaylistTrack (playlist_id, track_uri, position)
                    VALUES (?, ?, ?)
                """, (
                    pid,
                    track.get("track_uri"),
                    track.get("pos")
                ))

        conn.commit()

    except Exception as e:
        print(f"Erreur avec {filename} : {e}")

# 5. Statistiques
cursor.execute("SELECT COUNT(*) FROM Playlist")
print("Playlists :", cursor.fetchone()[0])
cursor.execute("SELECT COUNT(*) FROM Track")
print("Tracks :", cursor.fetchone()[0])
cursor.execute("SELECT COUNT(*) FROM Artist")
print("Artists :", cursor.fetchone()[0])
cursor.execute("SELECT COUNT(*) FROM Album")
print("Albums :", cursor.fetchone()[0])
cursor.execute("SELECT COUNT(*) FROM PlaylistTrack")
print("Relations Playlist-Track :", cursor.fetchone()[0])

print("/n🎉 Import terminé avec succès !")
conn.close()


1000 fichiers trouvés dans C:/Users/pierr/OneDrive/Bureau/B3/Projet_python/data


Importation JSON → SQL: 100%|██████████| 1000/1000 [46:20<00:00,  2.78s/it] 


Playlists : 1000000
Tracks : 2262292
Artists : 295860
Albums : 734684
Relations Playlist-Track : 66346428
/n🎉 Import terminé avec succès !
