In [4]:
!pip install python-dateutil



In [8]:
import uuid

type(str(uuid.uuid4()))

str

In [2]:
import json
import os
import datetime
import uuid
import locale
from dateutil import parser
from utils import database
from model import models, schemas
from functools import lru_cache

# Configurer la locale en français
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')

# Créer les tables uniquement si nécessaire
models.Base.metadata.create_all(bind=database.engine)

# Fonction pour ouvrir la base de données
def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

# Charger un fichier JSON
def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

# Obtenir la liste des fichiers JSON
def get_data_list(data_dir='./data'):
    return [f for f in os.listdir(data_dir) if f.endswith('.json')]

# Préparer les correspondances de mois (optimisé avec cache)
@lru_cache(maxsize=None)
def get_month_mapping():
    return {
        'janvier': 'January', 'février': 'February', 'mars': 'March',
        'avril': 'April', 'mai': 'May', 'juin': 'June', 'juillet': 'July',
        'août': 'August', 'septembre': 'September', 'octobre': 'October',
        'novembre': 'November', 'décembre': 'December'
    }

# Parser les dates en utilisant les correspondances
def parse_date(date_str):
    fr_to_en = get_month_mapping()
    day, month, year = date_str.split(' ')
    month_en = fr_to_en[month.lower()]
    date_en = f"{day} {month_en} {year}"
    return parser.parse(date_en, dayfirst=True)

# Insérer des données en base de données (optimisé pour les batchs)
def insert_data(dict_data):
    db = database.SessionLocal()
    try:
        # Insérer la localisation
        id_location = str(uuid.uuid4())
        dict_location = {
            'id_location': id_location,
            'longitude': dict_data['longitude'],
            'latitude': dict_data['latitude'],
            'adresse': dict_data['adresse']
        }
        location = models.DimLocation(**schemas.DimLocation(**dict_location).model_dump())
        db.add(location)

        # Insérer le restaurant
        id_restaurant = str(uuid.uuid4())
        dict_restaurant = {
            'id_restaurant': id_restaurant,
            'nom': dict_data['nom'],
            'id_location': id_location
        }
        restaurant = models.DimRestaurant(**schemas.DimRestaurant(**dict_restaurant).model_dump())
        db.add(restaurant)

        # Préparer les entrées pour les avis et les dates
        avis_entries = []
        date_entries = []

        for avis in dict_data['avis']:
            # Insérer la date
            id_date = str(uuid.uuid4())
            date_temp = parse_date(avis['date'])
            jour_temp, mois_temp, annee_temp = avis['date'].split(' ')
            dict_time = {
                'id_date': id_date,
                'date': date_temp,
                'mois': str(mois_temp),
                'annee': str(annee_temp),
                'jour': str(jour_temp),
            }
            date_entry = models.DimDate(**schemas.DimDate(**dict_time).model_dump())
            date_entries.append(date_entry)

            # Insérer l'avis
            id_avis = str(uuid.uuid4())
            dict_avis = {
                'id_avis': id_avis,
                'id_restaurant': id_restaurant,
                'id_date': id_date,
                'note': avis['nb_etoiles']
            }
            avis_entry = models.FaitAvis(**schemas.FaitAvis(**dict_avis).model_dump())
            avis_entries.append(avis_entry)

        # Exécuter les insertions groupées
        db.add_all(date_entries)
        db.add_all(avis_entries)
        db.commit()

    except Exception as e:
        print(f"Erreur : {e}")
        db.rollback()
    finally:
        db.close()

# Charger tous les fichiers JSON en mémoire
def load_all_json(data_dir='./data'):
    data_list = []
    for file in get_data_list(data_dir):
        data = read_json_file(f'{data_dir}/{file}')
        data_list.append(data)
    return data_list

# Insérer les données des fichiers JSON
def insert_json_data(data_dir='./data'):
    all_data = load_all_json(data_dir)
    for data in all_data:
        insert_data(data)

# Lancer l'importation
if __name__ == "__main__":
    insert_json_data()


In [None]:
import json
from util import database
from model import models, schemas
import os
import datetime
import uuid
import locale
from dateutil import parser
import datetime

# Configurer la locale en français
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')

def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

models.Base.metadata.create_all(bind=database.engine)

def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def get_data_list(data_dir='./data'):
    json_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]
    return json_files

def parse_date(date_str):
    # French to English month mappings
    fr_to_en = {
        'janvier': 'January',
        'février': 'February',
        'mars': 'March',
        'avril': 'April',
        'mai': 'May',
        'juin': 'June',
        'juillet': 'July',
        'août': 'August',
        'septembre': 'September',
        'octobre': 'October',
        'novembre': 'November',
        'décembre': 'December'
    }
    
    day, month, year = date_str.split(' ')
    
    month_en = fr_to_en[month.lower()]
    
    date_en = f"{day} {month_en} {year}"
    
    # Parse the English date string
    return parser.parse(date_en, dayfirst=True)

def insert_data(dict_data):
    db = database.SessionLocal()
    try:
        # Insert location
        id_location = str(uuid.uuid4())
        dict_location = {
            'id_location': id_location,
            'longitude': dict_data['longitude'],
            'latitude': dict_data['latitude'],
            'adresse': dict_data['adresse']
        }
        dim_location = schemas.DimLocation(**dict_location)
        location = models.DimLocation(**dim_location.model_dump())
        db.add(location)
        db.commit()
        db.refresh(location)

        # Insert restaurant
        id_restaurant = str(uuid.uuid4())
        dict_restaurant = {
            'id_restaurant': id_restaurant,
            'nom': dict_data['nom'],
            'id_location': id_location
        }
        dim_restaurant = schemas.DimRestaurant(**dict_restaurant)
        restaurant = models.DimRestaurant(**dim_restaurant.model_dump())
        db.add(restaurant)
        db.commit()
        db.refresh(restaurant)

        # Insert avis
        for avis in dict_data['avis']:
            # Insert date
            id_date = str(uuid.uuid4())
            date_temp = parse_date(avis['date'])

            jour_temp ,mois_temp , annee_temp = avis['date'].split(' ')

            dict_time = {
                'id_date': id_date,
                'date': date_temp,
                'mois': str(mois_temp),
                'annee': str(annee_temp),
                'jour': str(jour_temp),
            }
            dim_date = schemas.DimDate(**dict_time)
            date_entry = models.DimDate(**dim_date.model_dump())
            db.add(date_entry)
            db.commit()
            db.refresh(date_entry)

            # Insert avis
            id_avis = str(uuid.uuid4())
            dict_avis = {
                'id_avis': id_avis,
                'id_restaurant': id_restaurant,
                'id_date': id_date,
                'note': avis['nb_etoiles']
            }
            dim_avis = schemas.FaitAvis(**dict_avis)
            avis_entry = models.FaitAvis(**dim_avis.model_dump())
            db.add(avis_entry)
            db.commit()
            db.refresh(avis_entry)

    except Exception as e:
        print(e)
        db.rollback()
    finally:
        db.close()

def insert_json_data(data_dir='./data'):
    json_files = get_data_list(data_dir)
    for file in json_files:
        data = read_json_file(f'{data_dir}/{file}')
        insert_data(data)

insert_json_data()

In [4]:
import json
from util import database
from model import models, schemas
import os
import datetime
import uuid


def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

models.Base.metadata.create_all(bind= database.engine)


def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


def  get_data_list(data_dir = './data'):
    # data_dir = './data'
    json_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]
    return json_files

def insert_data(dict_data):
    db = database.SessionLocal()

    try:
        # Insert location
        id_location = uuid.uuid4()
        print(id_location)
        dict_location = {
            'id_location': id_location,  # Provide a default value if id_location is missing
            'longitude': dict_data['longitude'],
            'latitude': dict_data['latitude'],
            'adresse': dict_data['adresse']
        }
        dim_location = schemas.DimLocation(**dict_location)
        location = models.DimLocation(**dim_location.dict())
        db.add(location)
        db.commit()
        db.refresh(location)

        # Insert restaurant
        id_restaurant = uuid.uuid4()
        dict_restaurant = {
            'id_restaurant': id_restaurant,
            'nom': dict_data['nom'],
            'id_location': id_location
        }
        dim_restaurant = schemas.DimRestaurant(**dict_restaurant)
        restaurant = models.DimRestaurant(**dim_restaurant.dict())
        db.add(restaurant)
        db.commit()
        db.refresh(restaurant)

        # Insert avis
        for avis  in  dict_data['avis']:
            
            #insert date
            id_date = uuid.uuid4()
            date_temp = datetime.datetime.strptime(avis['date'])
            jour_semaine = date_temp.weekday()
            mois_temp = date_temp.strftime('%m')
            annee_temp = date_temp.strftime('%Y')
            dict_time = {
                'id_date': id_date,
                'date': date_temp,
                'mois': mois_temp,
                'annee': annee_temp,
                'jour_semaine': jour_semaine,
            }
            print(jour_semaine)
            #insert avis
            id_avis = uuid.uuid4()
            nombre_etoile = avis['nombre_etoile']
            dict_avis = {
                'id_avis': id_avis,
                'avis': avis
            }
            dim_avis = schemas.DimAvis(**dict_avis)
            avis = models.DimAvis(**dim_avis.dict())
            db.add(avis)
            db.commit()
            db.refresh(avis)





        dict_time = {
            'id_time': id_time,
            'date': dict_data['date'],
            'jour': dict_data['jour'],
            'heure': dict_data['heure']
        }
        dim_time = schemas.DimTime(**dict_time)
        time = models.DimTime(**dim_time.dict())

    except Exception as e:
        print(e)
        # db.rollback()
    finally:
        db.close()


def insert_json_data(data_dir = './data'):
    json_files = get_data_list(data_dir)
    for file in json_files:
        data = read_json_file(f'{data_dir}/{file}')
        insert_data(data)
        
insert_json_data( )

a39d4beb-b19d-47ce-b377-dac26207b498
1 validation error for DimLocation
id_location
  Input should be a valid string [type=string_type, input_value=UUID('a39d4beb-b19d-47ce-b377-dac26207b498'), input_type=UUID]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
b695c8bc-41b0-4ddd-9438-30b3e5ad25af
1 validation error for DimLocation
id_location
  Input should be a valid string [type=string_type, input_value=UUID('b695c8bc-41b0-4ddd-9438-30b3e5ad25af'), input_type=UUID]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
1e435dbc-77a8-4cad-9ee1-1b0065eefcee
1 validation error for DimLocation
id_location
  Input should be a valid string [type=string_type, input_value=UUID('1e435dbc-77a8-4cad-9ee1-1b0065eefcee'), input_type=UUID]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
b0934ba0-70d9-4979-b840-86203c81d762
1 validation error for DimLocation
id_location
  Input should be a valid string [type=s

In [41]:
id = random.randint(1, 1000000000)
id


877473145

In [11]:
#afficher  les  données de  la  table  DimLocation
def get_all_locations():
    db = database.SessionLocal()
    locations = db.query(models.DimLocation).all()
    db.close()
    return locations

data  = get_all_locations()
print(data)
for d in data:
    print(d.adresse)

[<model.models.DimLocation object at 0x0000022483382160>, <model.models.DimLocation object at 0x0000022483382280>, <model.models.DimLocation object at 0x00000224830D42E0>, <model.models.DimLocation object at 0x00000224830D45B0>, <model.models.DimLocation object at 0x00000224830D4130>, <model.models.DimLocation object at 0x00000224830D4370>, <model.models.DimLocation object at 0x00000224830D4610>, <model.models.DimLocation object at 0x00000224830D40A0>, <model.models.DimLocation object at 0x00000224830D4520>, <model.models.DimLocation object at 0x00000224830D4340>, <model.models.DimLocation object at 0x00000224830D4B20>, <model.models.DimLocation object at 0x0000022481F45970>, <model.models.DimLocation object at 0x0000022481F45670>, <model.models.DimLocation object at 0x0000022481F45490>, <model.models.DimLocation object at 0x0000022481F45580>, <model.models.DimLocation object at 0x0000022481F45B80>, <model.models.DimLocation object at 0x0000022481F45880>, <model.models.DimLocation obje

In [7]:
import json
from util import database
# from .database import SessionLocal
# from . import models, schemas, crud
from models import models, schemas, crud
# from sqlalchemy.orm import Session
# Create all tables if they don't exist
# Base.metadata.create_all(bind=engine)

def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

models.Base.metadata.create_all(bind=database.engine)
# def read_json_file(file_path):
#     with open(file_path, 'r', encoding='utf-8') as file:
#         data = json.load(file)
#     return data

# def insert_data_from_json(db: SessionLocal, data: dict):
#     # Insert locations
#     for location in data.get('locations', []):
#         location_schema = schemas.DimLocationCreate(**location)
#         crud.create_location(db, location_schema)

#     # Insert restaurants
#     for restaurant in data.get('restaurants', []):
#         restaurant_schema = schemas.DimRestaurantCreate(**restaurant)
#         crud.create_restaurant(db, restaurant_schema)

#     # Insert dates
#     for date in data.get('dates', []):
#         date_schema = schemas.DimDateCreate(**date)
#         crud.create_date(db, date_schema)

#     # Insert reviews
#     for review in data.get('reviews', []):
#         review_schema = schemas.FaitAvisCreate(**review)
#         crud.create_review(db, review_schema)

# def main():
#     db = SessionLocal()
#     try:
#         data = read_json_file('data.json')
#         insert_data_from_json(db, data)
#     finally:
#         db.close()

# if __name__ == "__main__":
#     main()

ImportError: cannot import name 'create_engine' from 'sqlalchemy' (c:\Users\ediad\Documents\NLP\TripAdvisor-NLP-Analysis\server\app\sqlalchemy\__init__.py)

In [1]:
import os
import json
import re
import string
from schemas import DimDateBase

def read_json_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)
    
data_dir = './data'
json_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]



for  file  in  json_files[:1]:
    # dim_date = DimDateBase()

    print(file)
    file_path = os.path.join(data_dir, file)
    data = read_json_file(file_path)
    print(data.keys())

Agastache_Restaurant.json
dict_keys(['nom', 'adresse', 'classement', 'horaires', 'note_globale', 'note_cuisine', 'note_service', 'note_rapportqualiteprix', 'note_ambiance', 'infos_pratiques', 'repas', 'regimes', 'fourchette_prix', 'fonctionnalités', 'type_cuisines', 'latitude', 'longitude', 'nb_avis', 'nbExcellent', 'nbTrèsbon', 'nbMoyen', 'nbMédiocre', 'nbHorrible', 'avis'])


In [None]:
import os
import json
import re
import string
from schemas import DimDateBase

def read_json_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)
    
data_dir = '../data'
json_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]



for  file  in  json_files[:1]:
    # dim_date = DimDateBase()

    print(file)
    file_path = os.path.join(data_dir, file)
    data = read_json_file(file_path)
    date = {
        'date': data['nom']

    }
    dim_date = DimDateBase()

    print(date)
    # print(data ['avis'])
    # print(len(data))

Agastache_Restaurant.json


TypeError: __init__() takes 1 positional argument but 2 were given

In [19]:
from schemas import DimDateCreate, DimAuteurCreate, FaitAvisCreate, DimDate, DimAuteur, FaitAvis

# Example function to create a new date entry
def create_date_entry(date_str: str, mois_str: str, annee_str: str):
    date_entry = DimDateCreate(date=date_str, mois=mois_str, annee=annee_str)
    # Perform further operations, such as saving to the database
    return date_entry

# # Example function to create a new author entry
# def create_author_entry(auteur: str, email: Optional[str] = None):
#     author_entry = DimAuteurCreate(auteur=auteur, email=email)
#     # Perform further operations, such as saving to the database
#     return author_entry

# # Example function to create a new review entry
# def create_review_entry(id_restaurant: int, id_date: int, id_auteur: int, note: int, commentaire: Optional[str] = None, nb_commentaire: Optional[int] = None):
#     review_entry = FaitAvisCreate(
#         id_restaurant=id_restaurant,
#         id_date=id_date,
#         id_auteur=id_auteur,
#         note=note,
#         commentaire=commentaire,
#         nb_commentaire=nb_commentaire
#     )
#     # Perform further operations, such as saving to the database
#     return review_entry

# Example usage
if __name__ == "__main__":
    date_entry = create_date_entry("2023-10-01", "10", "2023")
    print(date_entry)

    # author_entry = create_author_entry("John Doe", "john.doe@example.com")
    # print(author_entry)

    # review_entry = create_review_entry(1, 1, 1, 5, "Great restaurant!", 10)
    # print(review_entry)

date=datetime.date(2023, 10, 1) mois=10 annee=2023
