<a href="https://colab.research.google.com/github/Rafae1040/client-segment/blob/main/Segmentacao_de_Cliente.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Segmentação de Clientes usando Machine Learning

In [1]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [2]:
# Carregar os Dados

df = pd.read_csv('/content/dados_clientes.csv')

In [13]:
# Visualizar as 5 primeiras linhas

df.head()

Unnamed: 0,id,idade,renda_anual,pontuacao_gastos
0,1,56,94740,90
1,2,69,136352,50
2,3,46,86617,62
3,4,32,114841,97
4,5,60,36896,51


# Analise Exploratoria

In [7]:
# Resumo estatistico

df[['idade', 'renda_anual', 'pontuacao_gastos']].describe()

Unnamed: 0,idade,renda_anual,pontuacao_gastos
count,500.0,500.0,500.0
mean,44.732,81557.166,48.512
std,15.239707,36764.379945,29.556946
min,18.0,20384.0,0.0
25%,32.0,49172.75,24.0
50%,45.0,79219.0,48.5
75%,57.0,113017.25,73.25
max,70.0,149695.0,100.0


# Pré-Processamento dos Dados

In [8]:
# Criar o padronizador dos dados

padronizador = StandardScaler()

In [10]:
# Aplica o padronizador somente nas colunas de interesse

dados_padronizados = padronizador.fit_transform(df[['idade', 'renda_anual', 'pontuacao_gastos']])

In [12]:
# Visualiza os Dados
print(dados_padronizados)

[[ 0.74012478  0.35893538  1.40506904]
 [ 1.59401387  1.49192537  0.05039391]
 [ 0.08328703  0.13776654  0.45679645]
 ...
 [-0.31081563  0.32661636  0.18586143]
 [-1.23038848 -1.48706069  1.43893592]
 [-1.03333716 -0.96992912 -0.59307677]]


# Construção do Modelo de Machine Learning para Segmentação de Clientes

In [40]:
# Definimos o numero de clusters(k)

k = 3

In [41]:
# Criamos o modulo K-means

kmeans = KMeans(n_clusters = k)

In [42]:
# Treinamento do modelo com os dados padronizados

kmeans.fit(dados_padronizados)



In [43]:
# Atribuir os rótulos dos clusters aos clientes

df['cluster'] = kmeans.labels_

In [44]:
# Exibe o resultado das 10 primeiras linhas

df.head(10)

Unnamed: 0,id,idade,renda_anual,pontuacao_gastos,cluster
0,1,56,94740,90,0
1,2,69,136352,50,0
2,3,46,86617,62,2
3,4,32,114841,97,1
4,5,60,36896,51,2
5,6,25,145729,37,1
6,7,38,66175,96,2
7,8,56,27805,87,2
8,9,36,25237,78,2
9,10,40,135247,29,0


In [45]:
# Salva o resultado em disco

df.to_csv('dados_segmentos.csv', index = False)

# Gerando Relátorio dos Segmentos de Clientes com Power BI

In [30]:
# Instala o pacote
!pip install -q powerbiclient

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m684.9/684.9 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.0/99.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.9/316.9 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m49.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone


In [31]:
# Carrega as funções usadas para autenticar e gerar relatorios

from powerbiclient import QuickVisualize, get_dataset_config, Report
from powerbiclient.authentication import DeviceCodeLoginAuthentication

In [32]:
# Define a autenticação no Power BI service

device_auth = DeviceCodeLoginAuthentication()

Performing device flow authentication. Please follow the instructions below.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code F4JHYAMJH to authenticate.

Device flow authentication successfully completed.
You are now logged in .

The result should be passed only to trusted code in your notebook.


In [46]:
# Cria Relatorio no Power BI

relatorio_PBI = QuickVisualize(get_dataset_config(df), auth = device_auth)

In [47]:
# Renderiza visualiza o relatorio

relatorio_PBI

QuickVisualize()

In [38]:
from google.colab import output
output.enable_custom_widget_manager()

Support for third party widgets will remain active for the duration of the session. To disable support:

In [37]:
from google.colab import output
output.disable_custom_widget_manager()