# Projet:  
- Réalisation d'une ApplicationDasboard avec Streamlit  
à partir de base de données open source sur les transactions 
de cartes de crédits.

- Lien web vers la source des données:  
[credits_cards_transactions](https://www.kaggle.com/datasets/priyamchoksi/credit-card-transactions-dataset)

## Importation des librairies

- Installation des librairies:  
Exécuter dans la cellule:  %pip install -r requirements.txt  
ou  
Exécuter dans le terminal (prompt ou powershell):  pip install -r requirements.txt

- Importation des librairies

In [88]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import random
import warnings 
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns",None)
random.seed(123)

## Chargement des données

In [89]:
# Fonction pour charger les données

def load(file:str, size:int = 10**5, sep:str = ','):
    """--Docstring--
    fonction pour charger les données csv
    tout en optimisant la mémoire du système
    avec les fichiers volumineux. L'option
    size permet de charger les données par
    partition.
    Args:
        file: (string, file.csv or path )
        size: taille de la partition (integer, default = 100000)
              si la mémoire est insuffisante pour importer en un
              bloc.
        sep: séparateur (string, default = <,>)
    """

    try:

        data = pd.read_csv(file, sep=sep, index_col=0)
    
    except MemoryError:

        data = pd.read_csv(file, sep=sep, index_col=0,
                            chunksize = size)
    
    data.index.name = 'index'
    
    return data


In [90]:
# Chargement des données
path = "credit_card_transactions.csv"
financial_data = load(path) 

## Exploration des données

### affichage des données

In [91]:
financial_data.head(5)

Unnamed: 0_level_0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,city,state,zip,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,Moravian Falls,NC,28654,36.0788,-81.1781,3495,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0,28705.0
1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,Orient,WA,99160,48.8878,-118.2105,149,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0,
2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,Malad City,ID,83252,42.1808,-112.262,4154,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0,83236.0
3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,Boulder,MT,59632,46.2306,-112.1138,1939,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0,
4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,Doe Hill,VA,24433,38.4207,-79.4629,99,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0,22844.0


### description sur les types de variables

In [92]:
financial_data.columns

Index(['trans_date_trans_time', 'cc_num', 'merchant', 'category', 'amt',
       'first', 'last', 'gender', 'street', 'city', 'state', 'zip', 'lat',
       'long', 'city_pop', 'job', 'dob', 'trans_num', 'unix_time', 'merch_lat',
       'merch_long', 'is_fraud', 'merch_zipcode'],
      dtype='object')

In [93]:
# Fonction pour décrire le dataframe

def info(data):

    Information = pd.DataFrame({
        'Variables': data.columns,
        'Type': data.dtypes,
        'Unique_values': data.nunique(),
        'NA_counts': data.isna().sum(),
        'NA_percent%':data.isna().mean().round(4)*100
        }).reset_index(drop=True)

    return Information

# Application sur financial_data
display(info(financial_data))

Unnamed: 0,Variables,Type,Unique_values,NA_counts,NA_percent%
0,trans_date_trans_time,object,1274791,0,0.0
1,cc_num,int64,983,0,0.0
2,merchant,object,693,0,0.0
3,category,object,14,0,0.0
4,amt,float64,52928,0,0.0
5,first,object,352,0,0.0
6,last,object,481,0,0.0
7,gender,object,2,0,0.0
8,street,object,983,0,0.0
9,city,object,894,0,0.0


### mise en forme du type des variables

In [94]:
# fonction pour mettre en forme

def conversion(var):

    # conversion des variables de type = <date>
    date_vars = ('trans_date_trans_time', 'dob')

    if var.name in date_vars:
        
        return pd.to_datetime(var, infer_datetime_format = True)


    # conversion des variables type = <objet>
    if (var.dtype=='object') and (var.name not in date_vars):

        # conversion en string
        if var.nunique() > 20:
            return var.astype('string')

        # conversion en catégorie 
        else:
            return var.astype('category')
    
    else:
        return var 


In [95]:
# appication de la fonction sur les variables 
financial_data = financial_data.apply(conversion, axis = 0)
info(financial_data)

Unnamed: 0,Variables,Type,Unique_values,NA_counts,NA_percent%
0,trans_date_trans_time,datetime64[ns],1274791,0,0.0
1,cc_num,int64,983,0,0.0
2,merchant,string[python],693,0,0.0
3,category,category,14,0,0.0
4,amt,float64,52928,0,0.0
5,first,string[python],352,0,0.0
6,last,string[python],481,0,0.0
7,gender,category,2,0,0.0
8,street,string[python],983,0,0.0
9,city,string[python],894,0,0.0


### Création de nouvelles variables

- Variables dates

In [96]:
# mettre la variable trans_date_... au format 'jan-01-2020'
# avec une nouvelle variable

financial_data['date_transaction'] = (financial_data['trans_date_trans_time']
                                                   .dt.strftime('%b-%d-%Y'))

# Extraire des informations sur la transaction : jour, mois, heure

financial_data['day_transaction'] = (financial_data['trans_date_trans_time']
                                                   .dt.day_name().astype('category'))

financial_data['month_transaction'] = (financial_data['trans_date_trans_time']
                                                   .dt.month_name().astype('category'))

financial_data['hour_transaction'] = (financial_data['trans_date_trans_time']
                                                   .dt.strftime('%H.%M')).astype(float)

# Créer une variable moment de la journée à partir de heure_transaction

condlist = [financial_data['hour_transaction'].between(6,12.59),
            financial_data['hour_transaction'].between(13,18.59),
            financial_data['hour_transaction'].between(19,23.59),
            financial_data['hour_transaction'].between(0,5.59)
            ]
choicelist = ['Morning: 6h-12h59',
              'Afternoon: 13h-18h59',
              'Evening: 19h-23h59',
              'Night: 00h-5h59'
              ]

financial_data['moment_transaction'] = np.select(condlist, choicelist, default=None)
financial_data['moment_transaction'] = financial_data['moment_transaction'].astype('category')

In [97]:
info(financial_data)

Unnamed: 0,Variables,Type,Unique_values,NA_counts,NA_percent%
0,trans_date_trans_time,datetime64[ns],1274791,0,0.0
1,cc_num,int64,983,0,0.0
2,merchant,string[python],693,0,0.0
3,category,category,14,0,0.0
4,amt,float64,52928,0,0.0
5,first,string[python],352,0,0.0
6,last,string[python],481,0,0.0
7,gender,category,2,0,0.0
8,street,string[python],983,0,0.0
9,city,string[python],894,0,0.0


- Variables Âge, Nom

In [102]:
# Définir la date de référence pour le calcul de l'âge
reference_date = pd.to_datetime('2020-12-31')

# Calcul de l'âge de l'individu à partir de la date de naissance (dob) 
financial_data['age'] = ((reference_date - financial_data['dob']).dt.days/365).astype('int')

# Création de la variable groupe d'âge
financial_data['group_age']=pd.cut(financial_data['age'],
                                   bins=[14,31,46,60,100],
                                   labels=['15-30 ans', '31-45 ans','46-60 ans' ,'+60 ans']
                                  ).astype('category')

# Concaténer first(prénom) et last(nom_de_famille) pour avoir
# nom complet
financial_data['fullname'] = (financial_data['first'] +' '+ financial_data['last']).astype('string')

In [103]:
info(financial_data)

Unnamed: 0,Variables,Type,Unique_values,NA_counts,NA_percent%
0,trans_date_trans_time,datetime64[ns],1274791,0,0.0
1,cc_num,int64,983,0,0.0
2,merchant,string[python],693,0,0.0
3,category,category,14,0,0.0
4,amt,float64,52928,0,0.0
5,first,string[python],352,0,0.0
6,last,string[python],481,0,0.0
7,gender,category,2,0,0.0
8,street,string[python],983,0,0.0
9,city,string[python],894,0,0.0


In [105]:
financial_data.head(5)

Unnamed: 0_level_0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,city,state,zip,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode,date_transaction,day_transaction,month_transaction,hour_transaction,moment_transaction,age,group_age,fullname
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,Moravian Falls,NC,28654,36.0788,-81.1781,3495,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0,28705.0,Jan-01-2019,Tuesday,January,0.0,Night: 00h-5h59,32,31-45 ans,Jennifer Banks
1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,Orient,WA,99160,48.8878,-118.2105,149,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0,,Jan-01-2019,Tuesday,January,0.0,Night: 00h-5h59,42,31-45 ans,Stephanie Gill
2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,Malad City,ID,83252,42.1808,-112.262,4154,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0,83236.0,Jan-01-2019,Tuesday,January,0.0,Night: 00h-5h59,58,46-60 ans,Edward Sanchez
3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,Boulder,MT,59632,46.2306,-112.1138,1939,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0,,Jan-01-2019,Tuesday,January,0.01,Night: 00h-5h59,54,46-60 ans,Jeremy White
4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,Doe Hill,VA,24433,38.4207,-79.4629,99,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0,22844.0,Jan-01-2019,Tuesday,January,0.03,Night: 00h-5h59,34,31-45 ans,Tyler Garcia


### reorganisation des colonnes et exportation des données finales

In [110]:
# Réorganisation des columns

financial_data.rename(columns={'category':'business_sector',
                               'cc_num':'card_number'}, inplace = True)

list_vars = ['card_number', 'date_transaction', 'month_transaction',
             'day_transaction', 'hour_transaction', 'moment_transaction',
             'fullname', 'gender', 'age', 'group_age', 'job', 'business_sector',
             'city', 'city_pop', 'state', 'lat', 'long', 'is_fraud', 'trans_num']

financial_data[list_vars].sort_values(by = 'date_transaction',
                                      ascending = False,
                                      ignore_index=True).head(10)

Unnamed: 0,card_number,date_transaction,month_transaction,day_transaction,hour_transaction,moment_transaction,fullname,gender,age,group_age,job,business_sector,city,city_pop,state,lat,long,is_fraud,trans_num
0,3581130339108561,Sep-30-2019,September,Monday,5.12,Night: 00h-5h59,Gregory Wood,M,56,46-60 ans,Call centre manager,shopping_pos,Akron,272134,OH,41.0695,-81.5488,0,511adc9edb98bf6a4d7d71096551e422
1,4740713119940984,Sep-30-2019,September,Monday,10.08,Morning: 6h-12h59,Heather Hines,F,58,46-60 ans,Pensions consultant,grocery_net,Pomona,9993,NY,41.1901,-74.0436,0,b34d243707b47b905be1ba2f08fe61bc
2,3546897637165774,Sep-30-2019,September,Monday,9.58,Morning: 6h-12h59,Kayla Obrien,F,64,+60 ans,Barrister,grocery_pos,Prairie Hill,263,TX,31.6591,-96.8094,0,71936c790e0fa0e241e2a8c8066da67f
3,4025612008285111,Sep-30-2019,September,Monday,9.54,Morning: 6h-12h59,Krystal Key,F,71,+60 ans,Maintenance engineer,gas_transport,Corsica,1274,PA,41.1762,-79.1976,0,dfbd0429d2aff816111bee82554fcfce
4,3528407217576457,Sep-30-2019,September,Monday,9.53,Morning: 6h-12h59,Patricia Leach,F,33,31-45 ans,Warden/ranger,grocery_pos,Kingsport,87124,TN,36.4715,-82.4834,0,c01c0b2acda84ab0b6daa25ae743ea19
5,4969856774088583,Sep-30-2019,September,Monday,9.52,Morning: 6h-12h59,Jacqueline Prince,F,69,+60 ans,Advertising account planner,gas_transport,Metairie,137067,LA,29.9975,-90.2146,0,087b4bb42cbdddd8c47a208932501886
6,180094419304907,Sep-30-2019,September,Monday,9.51,Morning: 6h-12h59,Sharon Smith,F,48,46-60 ans,"Designer, exhibition/display",misc_net,Sun City,54287,CA,33.7467,-117.1721,0,68d24d1a313b27a00a20d79d343f3fbc
7,4836998673805450,Sep-30-2019,September,Monday,9.51,Morning: 6h-12h59,Susan Hardy,F,41,31-45 ans,Trade mark attorney,grocery_pos,Manistique,6469,MI,46.0062,-86.2555,0,68e8d5e9cb3169926b1d5605892a5fc6
8,180069253480336,Sep-30-2019,September,Monday,9.51,Morning: 6h-12h59,William Hunter,M,51,46-60 ans,Public affairs consultant,entertainment,Emporium,4533,PA,41.5177,-78.2536,0,0abd86561ce8c8abaefbfe26e506ab4d
9,6011603272871604,Sep-30-2019,September,Monday,9.5,Morning: 6h-12h59,Robert Holland,M,51,46-60 ans,Field seismologist,misc_pos,Milner,4138,GA,33.141,-84.1759,0,99e8bcd85da2978dfb01bb782fe06b17
