# Préparation

## Installation des packages

In [1]:
import pymongo
from pymongo import MongoClient
import json
import ujson
from bson import json_util
from bson.json_util import loads

## Connexion à la base de données

In [2]:
client = MongoClient(host="localhost", port=27017)
db = client["1_Warehouse"]

# Installation des fichiers de normalisation

## Installation de la collection `Customer_1`

In [3]:
Customer_1 = db["Customer_1"]

data = []
with open("./Normalization/Customer_1.json") as f:
    for line in f:
        data.append(json.loads(line))
Customer_1.insert_many(data)
print("Fichier Customer_1.json est inséré avec succès.")

Fichier Customer_1.json est inséré avec succès.


## Installation de la collection `Order_1`

In [4]:
Order_1 = db["Order_1"]

data = []
with open("./Normalization/Order_1.json") as f:
    for line in f:
        data.append(json.loads(line))
Order_1.insert_many(data)
print("Fichier Order_1.json est inséré avec succès.")

Fichier Order_1.json est inséré avec succès.


## Installation de la collection `Order_Line_1`

In [5]:
Order_Line_1 = db["Order_Line_1"]

data = []
with open("./Normalization/Order_Line_1.json") as f:
    for line in f:
        data.append(json.loads(line))
Order_Line_1.insert_many(data)
print("Fichier Order_Line_1.json est inséré avec succès.")

Fichier Order_Line_1.json est inséré avec succès.


# Adjonction des index des collections installées

## Adjonction des index de la collection `Customer_1`

In [6]:
result = db.Customer_1.create_index([('c_id', pymongo.ASCENDING)])
result = db.Customer_1.create_index([('c_d_id', pymongo.ASCENDING)])
result = db.Customer_1.create_index([('c_w_id', pymongo.ASCENDING)])
list(db.Customer_1.index_information())

['_id_', 'c_id_1', 'c_d_id_1', 'c_w_id_1']

## Adjonction des index de la collection `Order_1`

In [7]:
result = db.Order_1.create_index([('o_id', pymongo.ASCENDING)])
result = db.Order_1.create_index([('o_d_id', pymongo.ASCENDING)])
result = db.Order_1.create_index([('o_w_id', pymongo.ASCENDING)])
result = db.Order_1.create_index([('o_c_id', pymongo.ASCENDING)])
list(db.Order_1.index_information())

['_id_', 'o_id_1', 'o_d_id_1', 'o_w_id_1', 'o_c_id_1']

## Adjonction des index de la collection `Order_Line_1`

In [8]:
result = db.Order_Line_1.create_index([('ol_id', pymongo.ASCENDING)])
result = db.Order_Line_1.create_index([('ol_d_id', pymongo.ASCENDING)])
result = db.Order_Line_1.create_index([('ol_w_id', pymongo.ASCENDING)])
list(db.Order_Line_1.index_information())

['_id_', 'ol_id_1', 'ol_d_id_1', 'ol_w_id_1']

# Semi-dénormalisation

## Création du fichier au format JSON

In [9]:
i = 1
while i <= 4000:
    i_str = str(i)
    ujson.dumps(
        db.Order_1.aggregate(
            [{
                "$match": {
                    "o_id": i_str
                }
            },
             {
                 "$lookup": {
                     "from":
                     "Order_Line_1",
                     "let": {
                         "o_id": "$o_id",
                         "o_d_id": "$o_d_id",
                         "o_w_id": "$o_w_id"
                     },
                     "pipeline": [{
                         "$match": {
                             "$expr": {
                                 "$and": [{
                                     "$eq": ["$ol_id", "$$o_id"]
                                 }, {
                                     "$eq": ["$ol_d_id", "$$o_d_id"]
                                 }, {
                                     "$eq": ["$ol_w_id", "$$o_w_id"]
                                 }]
                             }
                         }
                     }],
                     "as":
                     "order_lines"
                 }
             }, {
                 "$out": "tmp"
             }]))
    cursor = db.tmp.find({})
    file = open("./Denormalization/Semi_1.json", "a")
    for document in cursor:
        file.write(json.dumps(document, default=json_util.default))
        file.write('\n')
    i += 1

print("Fichier Semi_1.json est créé avec succès.")

Fichier Semi_1.json est créé avec succès.


##  Installation de la collection `Semi_1`

In [10]:
Semi_1 = db["Semi_1"]

data = []
with open("./Denormalization/Semi_1.json", "r") as f:
    for line in f:
        data.append(loads(line))
Semi_1.insert_many(data)
print("Fichier Semi_1.json est inséré avec succès.")

Fichier Semi_1.json est inséré avec succès.


## Monstration d’un échantillon

In [11]:
list(
    db.Semi_1.find({
        "$and": [{
            "o_id": "1"
        }, {
            "o_c_id": "1"
        }, {
            "o_d_id": "1"
        }, {
            "o_w_id": "1"
        }]
    }, {
        "_id": 0,
    }))

[{'o_id': '1',
  'o_d_id': '1',
  'o_w_id': '1',
  'o_c_id': '1',
  'o_entry_d': '2018-05-31 00:00:00',
  'o_carrier_id': '2',
  'o_ol_cnt': '9',
  'o_all_local': '1',
  'order_lines': [{'_id': ObjectId('5c4ea560e3ecda0813deacd9'),
    'ol_id': '1',
    'ol_d_id': '1',
    'ol_w_id': '1',
    'ol_number': '1',
    'ol_i_id': '93535',
    'ol_supply_w_id': '1',
    'ol_delivery_d': '2018-05-31 00:00:00',
    'ol_quantity': 5,
    'ol_amount': 28.67,
    'ol_dist_info': '6Afa2bZ4imdTo5W5rY1tkdlX'},
   {'_id': ObjectId('5c4ea560e3ecda0813deacda'),
    'ol_id': '1',
    'ol_d_id': '1',
    'ol_w_id': '1',
    'ol_number': '2',
    'ol_i_id': '1578',
    'ol_supply_w_id': '1',
    'ol_delivery_d': '2018-05-31 00:00:00',
    'ol_quantity': 5,
    'ol_amount': 28.45,
    'ol_dist_info': 'urnsRgEluLI99hIPaj0FiNSy'},
   {'_id': ObjectId('5c4ea560e3ecda0813deacdb'),
    'ol_id': '1',
    'ol_d_id': '1',
    'ol_w_id': '1',
    'ol_number': '3',
    'ol_i_id': '62015',
    'ol_supply_w_id': '1',


## Adjonction des index de la collection `Semi_1`

In [12]:
result = db.Semi_1.create_index([('o_id', pymongo.ASCENDING)])
result = db.Semi_1.create_index([('o_d_id', pymongo.ASCENDING)])
result = db.Semi_1.create_index([('o_w_id', pymongo.ASCENDING)])
result = db.Semi_1.create_index([('o_c_id', pymongo.ASCENDING)])
list(db.Semi_1.index_information())

['_id_', 'o_id_1', 'o_d_id_1', 'o_w_id_1', 'o_c_id_1']

# Dénormalisation

## Création du fichier au format JSON

In [13]:
i = 1
while i <= 3000:
    i_str = str(i)
    ujson.dumps(
        db.Customer_1.aggregate(
            [{
                "$match": {
                    "c_id": i_str
                }
            },
             {
                 "$lookup": {
                     "from":
                     "Semi_1",
                     "let": {
                         "c_id": "$c_id",
                         "c_d_id": "$c_d_id",
                         "c_w_id": "$c_w_id"
                     },
                     "pipeline": [{
                         "$match": {
                             "$expr": {
                                 "$and": [{
                                     "$eq": ["$o_c_id", "$$c_id"]
                                 }, {
                                     "$eq": ["$o_d_id", "$$c_d_id"]
                                 }, {
                                     "$eq": ["$o_w_id", "$$c_w_id"]
                                 }]
                             }
                         }
                     }],
                     "as":
                     "customer_order"
                 }
             }, {
                 "$out": "tmp"
             }]))
    cursor = db.tmp.find({})
    file = open("./Denormalization/Denormal_1.json", "a")
    for document in cursor:
        file.write(json.dumps(document, default=json_util.default))
        file.write('\n')
    i += 1
print("Fichier Denormal_1.json est créé avec succès.")

Fichier Denormal_1.json est créé avec succès.


##  Installation de la collection `Denormal_1`

In [14]:
Denormal_1 = db["Denormal_1"]

data = []
with open("./Denormalization/Denormal_1.json", "r") as f:
    for line in f:
        data.append(loads(line))
Denormal_1.insert_many(data)
print("Fichier Denormal_1.json est inséré avec succès.")

Fichier Denormal_1.json est inséré avec succès.


## Monstration d’un échantillon

In [15]:
list(
    db.Denormal_1.find({
        "$and": [{
            "c_id": "1"
        }, {
            "c_d_id": "1"
        }, {
            "c_w_id": "1"
        }]
    }, {
        "_id": 0,
    }))

[{'c_id': '1',
  'c_d_id': '1',
  'c_w_id': '1',
  'c_first': 'HwGCLGahR',
  'c_middle': 'OE',
  'c_last': 'BARBARBAR',
  'c_street_1': 'Hw1j3srzjTfqh2l',
  'c_street_2': 'I5p6e157nUzuy5',
  'c_city': '7f9XKtCdjY',
  'c_state': 'J3',
  'c_zip': 'oYd1rLsPy',
  'c_phone': '0645826035543990',
  'c_since': '2018-05-31 00:00:00',
  'c_credit': 'GC',
  'c_credit_lim': '50000',
  'c_discount': '0.33',
  'c_balance': -10.0,
  'c_ytd_payment': 10.0,
  'c_payment_cnt': 1,
  'c_delivery_cnt': 0,
  'c_data': 'HJ51z50FOnLHDuME3jaKGCzXP6Fsk6azUKGc0Vjooo8AQ0jW0SJjXMLsBTQvgZ6rtQ80pBnVMU7cHFE7FrF0EJBWAFhm8BqRRr8NUtv1CoxJREMB2hmDuoyWgmbIzRPWp9PXxmMwyndzGH3pVCtOHnZv5HYCIU9UcBv4xMGaxGpuc5woB98WDYVDmkybP3HYzHmOaIrNtceiSvOCiN64xtiwQuMswoegnMnO0EpHFytP4s9GhxW2PpiNdvTJdQsIggCI9JXDxdH5LHke086BbkFCtio38mQyg019D24ObNANpod2lEjv7PzaOsmEOxTW3NKIb2Nis3i1Mab8ZGUq2biR9lRI',
  'customer_order': [{'_id': ObjectId('5c4ea55de3ecda0813de204a'),
    'o_id': '1',
    'o_d_id': '1',
    'o_w_id': '1',
    'o_c_id': '1',
    '

## Adjonction des index de la collection `Denormal_1`

In [16]:
result = db.Denormal_1.create_index([('c_id', pymongo.ASCENDING)])
result = db.Denormal_1.create_index([('c_d_id', pymongo.ASCENDING)])
result = db.Denormal_1.create_index([('c_w_id', pymongo.ASCENDING)])
list(db.Denormal_1.index_information())

['_id_', 'c_id_1', 'c_d_id_1', 'c_w_id_1']