In [None]:
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine


aujourdhui = datetime.now()
date_controle = aujourdhui.strftime('%d-%m-%y')


user = 'airflow'
password = 'airflow'
host = 'postgres'
port = '5432'  
database_bronze = 'bronze'
database_silver = 'silver'

db_url_bronze = f"postgresql://{user}:{password}@{host}:{port}/{database_bronze}"
engine_bronze = create_engine(db_url_bronze)

db_url_silver = f"postgresql://{user}:{password}@{host}:{port}/{database_silver}"
engine_silver= create_engine(db_url_silver)


df_clients = pd.read_sql("SELECT * FROM clients", con=engine_bronze)
df_controles = pd.read_sql("SELECT * FROM controles", con=engine_silver)


In [2]:
df_clients.head()

Unnamed: 0,ID_CLIENT,NOM,PRENOM,CIN,CARTE_SEJOUR,VILLE,NATIONALITE,TELEPHONE,id_intermediaire,DATE_CREATION
0,lu1cldwMLx,Hicks,Dawn,SJ109379,OL118027,,mre,800-778-8075x457,84,2021-05-14
1,k8N4sn4YJb,Wolfe,Kimberly,B136474,O499923,North Alishaport,mre,+1-913-505-2118x425,15,2024-03-27
2,6qzPbzht3H,Burch,Isabel3@,KX010301,HM264290,North Eric`,mre,935-921-5809,13,2021-07-16
3,KZPbHdxc1a,,Matthew,YT977001,OI901480,South Kenneth,etranger,4332103507,2,2023-02-25
4,3KaFE6Zhq9,Roth=#,,EB353024,B240907,,marocain,001-367-758-8791,91,2021-01-03


In [3]:
df_controles.head()

Unnamed: 0,ID_CONTROLE,DATA,DIM_CONTROLE,DATE_CONTROLE


### table resultat coherence entre la colonne CIN et NATIONALITE

In [None]:
def tester_coherence_cin_nationalite(row):
    cin = row['cin']
    nationalite = row['nationalite']
    
    if cin and nationalite.lower() == "marocain":
        return 1
    return 0

In [None]:
df_co_cin_natio = df_clients[['id_client']].copy()
df_co_cin_natio['result'] = df_clients.apply(tester_coherence_cin_nationalite, axis=1)


max_id = df_controles['id_controle'].max()
df_co_cin_natio['id_controle'] = max_id + 1

In [None]:
df_co_cin_natio.to_sql('df_co_cin_natio', con=engine_silver, if_exists='replace', index=False)

### remplissage de la table des controles

In [None]:
id_controle = df_controles['id_controle'].max() + 1
data = "cin_nationalite"
dim_controle = "coherence"

nouvelles_lignes = [[id_controle, data, dim_controle, date_controle]]
nouvelle_ligne_df = pd.DataFrame(nouvelles_lignes, columns=['id_controle', 'data', 'dim_controle', 'date_controle'])
df_controles = pd.concat([df_controles, nouvelle_ligne_df], ignore_index=True)

### table resultat coherence entre la colonne CIN et CARTE_SEJOUR

In [None]:
def tester_coherence_cin_carte_sejour(row):
    cin = row['cin']
    carte_sejour = row['carte_sejour']
    if (cin and carte_sejour) or (not cin and not carte_sejour):
        return 0 
    return 1 

In [None]:
df_co_cin_carteSejour = df_clients[['id_client']].copy()
df_co_cin_carteSejour['result'] = df_clients.apply(tester_coherence_cin_carte_sejour, axis=1)


max_id = df_controles['id_controle'].max()
df_co_cin_carteSejour['id_controle'] = max_id + 1

In [None]:
df_co_cin_carteSejour.to_sql('df_co_cin_carteSejour', con=engine_silver, if_exists='replace', index=False)

### remplissage de la table des controles

In [None]:
id_controle = df_controles['id_controle'].max() + 1
data = "cin_carte_sejour"
dim_controle = "coherence"

nouvelles_lignes = [[id_controle, data, dim_controle, date_controle]]
nouvelle_ligne_df = pd.DataFrame(nouvelles_lignes, columns=['id_controle', 'data', 'dim_controle', 'date_controle'])
df_controles = pd.concat([df_controles, nouvelle_ligne_df], ignore_index=True)

### table resultat coherence entre la colonne nom et prenom

In [None]:
import pandas as pd

def tester_coherence_nom_prenom(row):
    nom = row['nom']
    prenom = row['prenom']

    if pd.isnull(nom) or pd.isnull(prenom):
        return 0  
    
    if prenom in nom.split() or nom in prenom.split():
        return 0  
    return 1  


In [None]:
df_co_nom_prenom = df_clients[['id_client']].copy()
df_co_nom_prenom['result'] = df_clients.apply(tester_coherence_nom_prenom, axis=1)

max_id = df_controles['id_controle'].max()
df_co_nom_prenom['id_controle'] = max_id + 1

In [None]:
df_co_nom_prenom.to_sql('df_co_nom_prenom', con=engine_silver, if_exists='replace', index=False)

### remplissage de la table des controles

In [None]:
id_controle = df_controles['id_controle'].max() + 1
data = "nom_prenom"
dim_controle = "coherence"

nouvelles_lignes = [[id_controle, data, dim_controle, date_controle]]
nouvelle_ligne_df = pd.DataFrame(nouvelles_lignes, columns=['id_controle', 'data', 'dim_controle', 'date_controle'])
df_controles = pd.concat([df_controles, nouvelle_ligne_df], ignore_index=True)

In [None]:
df_controles.to_sql('controles', con=engine_silver, if_exists='append', index=False)