# L'opérateur REGEX
---

> Syntaxe :
> ```
> SELECT nom_colonne
> FROM nom_table
> WHERE nom_colonne REGEXP 'pattern';
> ```
> <br>

In [2]:
import pandas as pd
from skimpy import skim
from sqlalchemy import create_engine

In [3]:
# Connection à postgresql
engine = create_engine(
    'postgresql://laurent:test22@localhost:5432/test1')

## Fichier des produits

In [4]:
# Récupération des données de la table products_dim
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim;
"""
products_df = pd.read_sql(sql=query, con=engine)

In [5]:
products_df.head().transpose()

Unnamed: 0,0,1,2,3,4
ProduitID,1,2,3,4,5
NomProduit,Samsung Galaxy S21,Samsung TV 55',Nike Air Max,Levi's Jeans,Dyson Vacuum
Description,"Featuring a 6.2-inch dynamic AMOLED display, t...","55-inch 4K UHD Smart TV with HDR, Crystal Disp...","Running shoes with Max Air cushioning, mesh up...","Classic denim jeans with a straight leg fit, f...",Cordless vacuum cleaner with strong suction po...
PrixUnitaire,699,129,85,299,999
FournisseurID,4,51,93,8,58


In [6]:
skim(products_df)

## Fichier des clients

In [7]:
# Récupération des données de la table products_dim
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim;
"""
customers_df = pd.read_sql(sql=query, con=engine)

In [8]:
customers_df.head().transpose()

Unnamed: 0,0,1,2,3,4
ClientID,1,2,3,4,5
Nom,Thompson,Pham,Rodriguez,Perez,Kramer
Prenom,Shannon,Benjamin,Bonnie,Stephanie,Joyce
Adresse,"384 Howard Parks, New Tony, DE 67522","15250 Anita Pine Apt. 887, New Kristenport, AR...","515 Henderson Coves Apt. 298, Lewismouth, OR 1...","619 Juan Parkways Suite 983, Tracibury, NM 13549","85731 Danny Groves Suite 513, Lake Michaelvill..."
Email,thompson.shannon@gmail.com,pham.benjamin@gmail.com,rodriguez.bonnie@gmail.com,perez.stephanie@gmail.com,kramer.joyce@gmail.com
NumeroTelephone,425163679,544573781,956054323,37851314,705010731


In [9]:
skim(customers_df)

## Requêtes SQL

Liste des produits qui contiennent 'TV' avec LIKE

In [22]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim
-- Conditions
WHERE "NomProduit" LIKE '%%TV%%';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ProduitID,NomProduit,Description,PrixUnitaire,FournisseurID
0,2,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",129,51
1,7,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",85,72
2,12,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",799,4
3,16,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",699,50
4,29,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",399,80
5,33,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",399,49
6,35,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",799,47
7,38,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",799,22
8,44,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",399,40
9,48,Samsung TV 55',"55-inch 4K UHD Smart TV with HDR, Crystal Disp...",699,24


Liste des produits qui contiennent 'TV' avec REGEXP

In [10]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim
-- Condition
WHERE "NomProduit" ~ 'TV';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ProduitID,2,7,12,16,29
NomProduit,Samsung TV 55',Samsung TV 55',Samsung TV 55',Samsung TV 55',Samsung TV 55'
Description,"55-inch 4K UHD Smart TV with HDR, Crystal Disp...","55-inch 4K UHD Smart TV with HDR, Crystal Disp...","55-inch 4K UHD Smart TV with HDR, Crystal Disp...","55-inch 4K UHD Smart TV with HDR, Crystal Disp...","55-inch 4K UHD Smart TV with HDR, Crystal Disp..."
PrixUnitaire,129,85,799,699,399
FournisseurID,51,72,4,50,80


Liste des produits dont le nom commence par la lettre D avec LIKE

In [24]:
query ="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim
-- Condition
WHERE "NomProduit" LIKE 'D%%';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ProduitID,NomProduit,Description,PrixUnitaire,FournisseurID
0,5,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,999,58
1,6,Dell XPS 13,13.4-inch FHD+ laptop with 10th Gen Intel Core...,299,28
2,10,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,699,47
3,11,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,999,39
4,14,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,799,82
5,19,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,59,13
6,22,Dell XPS 13,13.4-inch FHD+ laptop with 10th Gen Intel Core...,129,93
7,24,Dell XPS 13,13.4-inch FHD+ laptop with 10th Gen Intel Core...,999,34
8,28,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,129,84
9,31,Dell XPS 13,13.4-inch FHD+ laptop with 10th Gen Intel Core...,999,2


Liste des produits dont le nom commence par la lettre D avec REGEXP

In [11]:
query = """
-- Colonnes récupérées
SELECT * 
-- BD récupérée
FROM products_dim
-- Condition
WHERE "NomProduit" ~ '^D';
"""

pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ProduitID,5,6,10,11,14
NomProduit,Dyson Vacuum,Dell XPS 13,Dyson Vacuum,Dyson Vacuum,Dyson Vacuum
Description,Cordless vacuum cleaner with strong suction po...,13.4-inch FHD+ laptop with 10th Gen Intel Core...,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...
PrixUnitaire,999,299,699,999,799
FournisseurID,58,28,47,39,82


Liste des produits qui se termine par la lettre M avec LIKE

In [25]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim
-- Condition
WHERE "NomProduit" LIKE '%%m';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ProduitID,NomProduit,Description,PrixUnitaire,FournisseurID
0,5,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,999,58
1,10,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,699,47
2,11,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,999,39
3,14,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,799,82
4,19,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,59,13
5,28,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,129,84
6,34,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,59,92
7,36,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,85,15
8,40,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,59,95
9,42,Dyson Vacuum,Cordless vacuum cleaner with strong suction po...,59,13


Liste des produits qui se termine par la lettre M avec REGEXP

In [12]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM products_dim
-- Condition
WHERE "NomProduit" ~ 'm$';
"""

pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ProduitID,5,10,11,14,19
NomProduit,Dyson Vacuum,Dyson Vacuum,Dyson Vacuum,Dyson Vacuum,Dyson Vacuum
Description,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...,Cordless vacuum cleaner with strong suction po...
PrixUnitaire,999,699,999,799,59
FournisseurID,58,47,39,82,13


Liste des clients dont le numéro du téléphone commence par '4' ou '5' avec LIKE

In [30]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "NumeroTelephone" LIKE '4%%' OR "NumeroTelephone" LIKE '5%%';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ClientID,Nom,Prenom,Adresse,Email,NumeroTelephone
0,1,Thompson,Shannon,"384 Howard Parks, New Tony, DE 67522",thompson.shannon@gmail.com,425163679
1,2,Pham,Benjamin,"15250 Anita Pine Apt. 887, New Kristenport, AR...",pham.benjamin@gmail.com,544573781
2,6,Scott,Jacob,"474 Rogers Fork Apt. 821, Chelseabury, MO 72167",scott.jacob@gmail.com,461756294
3,7,Rodriguez,Megan,"12744 Daniel Tunnel, Millerside, HI 91891",rodriguez.megan@gmail.com,55426480
4,12,Smith,Jeremy,"USCGC Taylor, FPO AE 79668",smith.jeremy@gmail.com,458994501
5,14,Sylvia,Nicole,"68737 Beasley Fall Apt. 435, Lake Kennethshire...",sylvia.nicole@gmail.com,408607465
6,18,Soto,Holly,"48506 Foster Falls Apt. 055, Lake Cameronton, ...",soto.holly@gmail.com,403071718
7,27,Brown,Danielle,"732 Joshua Ways, Jamesfort, OH 72222",brown.danielle@gmail.com,590619668
8,29,Jimenez,Susan,"8140 Susan Port Suite 208, West Christopher, N...",jimenez.susan@gmail.com,578759133
9,37,Glenn,Monica,"3183 Albert Wall Suite 990, New Timothymouth, ...",glenn.monica@gmail.com,471358315


Liste des clients dont le numéro de téléphone commence par '4' ou '5' avec REGEXP

In [13]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "NumeroTelephone" ~ '^4|^5';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ClientID,1,2,6,7,12
Nom,Thompson,Pham,Scott,Rodriguez,Smith
Prenom,Shannon,Benjamin,Jacob,Megan,Jeremy
Adresse,"384 Howard Parks, New Tony, DE 67522","15250 Anita Pine Apt. 887, New Kristenport, AR...","474 Rogers Fork Apt. 821, Chelseabury, MO 72167","12744 Daniel Tunnel, Millerside, HI 91891","USCGC Taylor, FPO AE 79668"
Email,thompson.shannon@gmail.com,pham.benjamin@gmail.com,scott.jacob@gmail.com,rodriguez.megan@gmail.com,smith.jeremy@gmail.com
NumeroTelephone,425163679,544573781,461756294,55426480,458994501


Donner la liste des clients dont le numéro de téléphone commence par 5 ou se termine par 94 avec LIKE

In [31]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "NumeroTelephone" LIKE '5%%' OR "NumeroTelephone" LIKE '%%94';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ClientID,Nom,Prenom,Adresse,Email,NumeroTelephone
0,2,Pham,Benjamin,"15250 Anita Pine Apt. 887, New Kristenport, AR...",pham.benjamin@gmail.com,544573781
1,6,Scott,Jacob,"474 Rogers Fork Apt. 821, Chelseabury, MO 72167",scott.jacob@gmail.com,461756294
2,7,Rodriguez,Megan,"12744 Daniel Tunnel, Millerside, HI 91891",rodriguez.megan@gmail.com,55426480
3,27,Brown,Danielle,"732 Joshua Ways, Jamesfort, OH 72222",brown.danielle@gmail.com,590619668
4,29,Jimenez,Susan,"8140 Susan Port Suite 208, West Christopher, N...",jimenez.susan@gmail.com,578759133
5,40,Johnson,Stephanie,"386 Jermaine Plain Apt. 929, Williamsville, NM...",johnson.stephanie@gmail.com,557240325
6,42,Coleman,Dennis,"925 Robertson Springs Suite 489, Port Jeffreyb...",coleman.dennis@gmail.com,516922641
7,45,Pitts,Ronald,"26300 Rodriguez Mission Apt. 754, Port Joel, N...",pitts.ronald@gmail.com,589602136
8,61,Floyd,Mark,"885 Garner Canyon, East Christy, CA 20864",floyd.mark@gmail.com,53527748
9,62,Williams,Yesenia,"PSC 6015, Box 6383, APO AA 64305",williams.yesenia@gmail.com,535311132


Donner la liste des clients dont le numéro de téléphone commence par 5 ou se termine par 94 avec REGEXP

In [14]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "NumeroTelephone" ~ '^5|94$';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ClientID,2,6,7,27,29
Nom,Pham,Scott,Rodriguez,Brown,Jimenez
Prenom,Benjamin,Jacob,Megan,Danielle,Susan
Adresse,"15250 Anita Pine Apt. 887, New Kristenport, AR...","474 Rogers Fork Apt. 821, Chelseabury, MO 72167","12744 Daniel Tunnel, Millerside, HI 91891","732 Joshua Ways, Jamesfort, OH 72222","8140 Susan Port Suite 208, West Christopher, N..."
Email,pham.benjamin@gmail.com,scott.jacob@gmail.com,rodriguez.megan@gmail.com,brown.danielle@gmail.com,jimenez.susan@gmail.com
NumeroTelephone,544573781,461756294,55426480,590619668,578759133


Donner la liste des clients dont le nom contient la lettre 'R' 
suivi de n'importe quelle autre lettre et suivi de la lettre 'C'  avec LIKE

In [37]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" LIKE 'R%%_c%%';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ClientID,Nom,Prenom,Adresse,Email,NumeroTelephone
0,46,Richardson,Cheyenne,"70396 George Trafficway, New Laurie, KS 05583",richardson.cheyenne@gmail.com,639669352


Donner la liste des clients dont le nom contient la lettre 'R' 
suivi de n'importe quelle autre lettre et suivi de la lettre 'C'  avec REGEXP

In [15]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" ~ '^R.c';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0
ClientID,46
Nom,Richardson
Prenom,Cheyenne
Adresse,"70396 George Trafficway, New Laurie, KS 05583"
Email,richardson.cheyenne@gmail.com
NumeroTelephone,639669352


Liste des clients dont le nom commence par 'R' et se termine par 'A' avec LIKE

In [39]:
query="""
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" LIKE 'R%%%%a';
"""
pd.read_sql(sql=query, con=engine)

Unnamed: 0,ClientID,Nom,Prenom,Adresse,Email,NumeroTelephone
0,85,Rivera,Deborah,"390 Danielle Valley, Martinezberg, SD 64539",rivera.deborah@gmail.com,168181172


Liste des clients dont le nom commence par 'R' et se termine par 'A' avec REGEXP

In [16]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" ~ '^R.*a$';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0
ClientID,85
Nom,Rivera
Prenom,Deborah
Adresse,"390 Danielle Valley, Martinezberg, SD 64539"
Email,rivera.deborah@gmail.com
NumeroTelephone,168181172


Liste des clients dont le nom contient 'it' ou 'ie' ou 'il' ou 'is' avec REGEXP

In [17]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" ~ 'i[tels]';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ClientID,10,12,15,17,20
Nom,Wilkins,Smith,Smith,Carrillo,Smith
Prenom,Patricia,Jeremy,Cynthia,Brianna,Justin
Adresse,"400 Tate Glen, Port Kristenfurt, OH 07250","USCGC Taylor, FPO AE 79668","382 Daniel Groves Suite 995, East Sherryfurt, ...","543 Austin Rest Apt. 109, East Amystad, FL 57767","603 Daniel Lakes Apt. 472, New Sandra, CT 81632"
Email,wilkins.patricia@gmail.com,smith.jeremy@gmail.com,smith.cynthia@gmail.com,carrillo.brianna@gmail.com,smith.justin@gmail.com
NumeroTelephone,888856897,458994501,219759485,670055675,843655697


Liste des clients dont le nom contient la lettre 'i' suivi d'une lettre comprise entre 'f' et 't' avec REGEXP

In [18]:
query = """
-- Colonnes récupérées
SELECT *
-- BD récupérée
FROM customers_dim
-- Condition
WHERE "Nom" ~ 'i[f-t]';
"""
pd.read_sql(sql=query, con=engine).head().transpose()

Unnamed: 0,0,1,2,3,4
ClientID,3,7,8,10,12
Nom,Rodriguez,Rodriguez,Collins,Wilkins,Smith
Prenom,Bonnie,Megan,Alexander,Patricia,Jeremy
Adresse,"515 Henderson Coves Apt. 298, Lewismouth, OR 1...","12744 Daniel Tunnel, Millerside, HI 91891","231 Eric Spring, Paulahaven, SD 79669","400 Tate Glen, Port Kristenfurt, OH 07250","USCGC Taylor, FPO AE 79668"
Email,rodriguez.bonnie@gmail.com,rodriguez.megan@gmail.com,collins.alexander@gmail.com,wilkins.patricia@gmail.com,smith.jeremy@gmail.com
NumeroTelephone,956054323,55426480,878299289,888856897,458994501
