In [34]:
import pandas as pd

In [35]:
# Gerando um dataframe
dataframe = pd.DataFrame({"Nome": ['Tonho', 'Nicão', 'Jão', 'Pedrão'], "Idade": [15, 26, 17, 28]})
print(dataframe)

     Nome  Idade
0   Tonho     15
1   Nicão     26
2     Jão     17
3  Pedrão     28


In [36]:
# Alterandonomes das colunas - Opcao I
dataframe.columns = ['Nome_Colega', 'Idade_Anos']
print(dataframe)

  Nome_Colega  Idade_Anos
0       Tonho          15
1       Nicão          26
2         Jão          17
3      Pedrão          28


In [37]:
# Alterando nome das colunas - Opcao II
dataframe = dataframe.rename(columns = {"Nome_Colega" : "Friends_Name", "Idade_Anos" : "Age_Years"})
print(dataframe)

  Friends_Name  Age_Years
0        Tonho         15
1        Nicão         26
2          Jão         17
3       Pedrão         28


In [38]:
# Excluindo a coluna "Age" do dataframe People
# O comando drop() é bastante útil nessa missão de excluir colunas no dataframe. O parâmetro inPlace=True permite apagar a
# coluna no dataframe original, sem precisar renomeá-lo.
people = pd.read_csv("PeopleAnalytics.txt", sep = "\t", usecols = ["Attrition", "Age", "StockOptionLevel"])
print(people)
people.drop(["Age"], axis = 1, inplace = True)
print(people)

     Attrition  Age  StockOptionLevel
0          Yes   41                 0
1           No   49                 1
2          Yes   37                 0
3           No   33                 0
4           No   27                 1
...        ...  ...               ...
1465        No   36                 1
1466        No   39                 1
1467        No   27                 1
1468        No   49                 0
1469        No   34                 0

[1470 rows x 3 columns]
     Attrition  StockOptionLevel
0          Yes                 0
1           No                 1
2          Yes                 0
3           No                 0
4           No                 1
...        ...               ...
1465        No                 1
1466        No                 1
1467        No                 1
1468        No                 0
1469        No                 0

[1470 rows x 2 columns]


In [39]:
# Criando um dataset
base = pd.DataFrame({'Data':['10/2/2021', '11/2/2021', '12/2/2021', '13/2/2021'],
                    'Evento':['Música', 'Poesia', 'Teatro', 'Comédia'],
                    'Custo':[10000, 5000, 15000, 2000]})
print(base)

        Data   Evento  Custo
0  10/2/2021   Música  10000
1  11/2/2021   Poesia   5000
2  12/2/2021   Teatro  15000
3  13/2/2021  Comédia   2000


In [40]:
# Calcula preço com desconto de 10% - Opcao I
base['Custo_com_Desconto'] = base.apply(lambda row: row.Custo - (row.Custo * 0.1), axis = 1)
print(base)

        Data   Evento  Custo  Custo_com_Desconto
0  10/2/2021   Música  10000              9000.0
1  11/2/2021   Poesia   5000              4500.0
2  12/2/2021   Teatro  15000             13500.0
3  13/2/2021  Comédia   2000              1800.0


In [41]:
# Calcula preco com desconto de 10% - Opcao II
base['Custo_com_Desconto_I'] = base['Custo'] - (0.1 * base['Custo'])
print(base)

        Data   Evento  Custo  Custo_com_Desconto  Custo_com_Desconto_I
0  10/2/2021   Música  10000              9000.0                9000.0
1  11/2/2021   Poesia   5000              4500.0                4500.0
2  12/2/2021   Teatro  15000             13500.0               13500.0
3  13/2/2021  Comédia   2000              1800.0                1800.0


In [42]:
# Categorizando a variavel Custo
# Existem inúmeras formas de criar uma variável categorizada a partir de uma variável original (seja ela numérica ou
# categórica). O método select() é muito útil nesse contexto.
import numpy as np
condicoes = [(base["Custo"] <= 2500), (base["Custo"] >2500) & (base["Custo"] <= 5000), (base["Custo"] > 5000)]
classe = ["Baixo", "Medio", "Alto"]
base['Fix_Custo'] = np.select(condicoes, classe, default = "Sem Info")
base

Unnamed: 0,Data,Evento,Custo,Custo_com_Desconto,Custo_com_Desconto_I,Fix_Custo
0,10/2/2021,Música,10000,9000.0,9000.0,Alto
1,11/2/2021,Poesia,5000,4500.0,4500.0,Medio
2,12/2/2021,Teatro,15000,13500.0,13500.0,Alto
3,13/2/2021,Comédia,2000,1800.0,1800.0,Baixo


In [43]:
# Agrupando os dados
df = pd.DataFrame({'coluna_I': ['A', 'B', 'C', 'A', 'C',
                               'C', 'B', 'D', 'D', 'A'],
                   'coluna_II': [5, 10, 15, 20, 25, 30, 
                             35, 40, 45, 50]})
df

Unnamed: 0,coluna_I,coluna_II
0,A,5
1,B,10
2,C,15
3,A,20
4,C,25
5,C,30
6,B,35
7,D,40
8,D,45
9,A,50


In [44]:
# Agrupando os dados da coluna I e somando os valores da coluna II para os valores unicos da coluna I - Opcao I
df1 = df.groupby('coluna_I')['coluna_II'].sum()
df1

coluna_I
A    75
B    45
C    70
D    85
Name: coluna_II, dtype: int64

In [45]:
# Agrupando os dados da coluna I e somando os valores da coluna II para os valores unicos da coluna I - Opcao II
df.groupby("coluna_I").agg({"coluna_II":sum})

Unnamed: 0_level_0,coluna_II
coluna_I,Unnamed: 1_level_1
A,75
B,45
C,70
D,85


In [46]:
# Agrupando os dados da coluna I e somando os valores da coluna II para os valores unicos da coluna I - Opcao III
df.groupby("coluna_I").agg({"coluna_II" : ["sum", "mean"]})

Unnamed: 0_level_0,coluna_II,coluna_II
Unnamed: 0_level_1,sum,mean
coluna_I,Unnamed: 1_level_2,Unnamed: 2_level_2
A,75,25.0
B,45,22.5
C,70,23.333333
D,85,42.5


In [47]:
# Criando um dataset
base_A=pd.DataFrame({"id":[1,2,3,4,5,6,7],"saldo":[100,200,100,300,400,500,690]})
print(base_A)
base_B=pd.DataFrame({"id":[1,2,5,6,7],"idade_empresa":[15,22,11,13,8]})
print(base_B)

   id  saldo
0   1    100
1   2    200
2   3    100
3   4    300
4   5    400
5   6    500
6   7    690
   id  idade_empresa
0   1             15
1   2             22
2   5             11
3   6             13
4   7              8


In [48]:
# Cruzando dados da tabela A e da B - inner join
pd.merge(base_A, base_B, on="id", how="inner")

Unnamed: 0,id,saldo,idade_empresa
0,1,100,15
1,2,200,22
2,5,400,11
3,6,500,13
4,7,690,8


In [49]:
# Cruzando dados da tabela A e da B - left join
pd.merge(base_A, base_B, on="id", how="left")

Unnamed: 0,id,saldo,idade_empresa
0,1,100,15.0
1,2,200,22.0
2,3,100,
3,4,300,
4,5,400,11.0
5,6,500,13.0
6,7,690,8.0


In [50]:
# Cruzando dados da tabela A e da B - right join
pd.merge(base_A, base_B, on="id", how="right")

Unnamed: 0,id,saldo,idade_empresa
0,1,100,15
1,2,200,22
2,5,400,11
3,6,500,13
4,7,690,8


In [51]:
# Cruzando dados da tabela A e da B - full join
pd.merge(base_A, base_B, on="id", how="outer")

Unnamed: 0,id,saldo,idade_empresa
0,1,100,15.0
1,2,200,22.0
2,3,100,
3,4,300,
4,5,400,11.0
5,6,500,13.0
6,7,690,8.0


In [56]:
# Importando a tabela do Excel
indsocdem=pd.read_excel("Indicadores Socio-Demograficos.xlsx", sheet_name="BASE_DADOS")
indsocdem

Unnamed: 0,CIDADE,ESTADO,IBGE_RES_POP,IBGE_POP_URB,POP_ABAIXO_1,POP_1_4,POP_5_9,POP_10-14,POP_15-59,POP_60_MAIS,...,IDHM_Educacao,LONG,LAT,TV_CABO,POP_ESTIMADA,PIB,PIB_CAPITA,CARROS,MOTOS,TOT_EMPRESAS
0,Abadia De Goiás,GO,6876.0,5300.0,69.0,318.0,438.0,517.0,3542.0,416.0,...,0.622,-49.440548,-16.758812,360.0,8583.0,166.41,20664.57,2158.0,1246.0,284.0
1,Abadia Dos Dourados,MG,6704.0,4154.0,38.0,207.0,260.0,351.0,2709.0,589.0,...,0.563,-47.396832,-18.487565,77.0,6972.0,180.09,25591.70,2227.0,1142.0,476.0
2,Abadiânia,GO,15757.0,10656.0,139.0,650.0,894.0,1087.0,6896.0,990.0,...,0.579,-48.718812,-16.182672,227.0,19614.0,287984.49,15628.40,2838.0,1426.0,288.0
3,Abaeté,MG,22690.0,18464.0,176.0,856.0,1233.0,1539.0,11979.0,2681.0,...,0.556,-45.446191,-19.155848,1230.0,23223.0,430235.36,18250.42,6928.0,2953.0,621.0
4,Abaetetuba,PA,141100.0,82956.0,1354.0,5567.0,7618.0,8905.0,53516.0,5996.0,...,0.537,-48.884404,-1.723470,3389.0,156292.0,1249255.29,8222.36,5277.0,25661.0,931.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5568,Xique-Xique,BA,45536.0,32497.0,518.0,2318.0,3095.0,3620.0,19371.0,3575.0,...,0.479,-42.725508,-10.824974,169.0,46440.0,342261.06,7089.97,2125.0,5064.0,420.0
5569,Zabelê,PB,2075.0,1469.0,20.0,110.0,140.0,121.0,874.0,204.0,...,0.587,-37.093552,-8.076874,2.0,2225.0,19582.06,8793.02,87.0,162.0,20.0
5570,Zacarias,SP,2335.0,1836.0,21.0,104.0,123.0,127.0,1165.0,296.0,...,0.674,-50.055740,-21.050110,155.0,2684.0,75934.89,29149.67,901.0,346.0,111.0
5571,Zé Doca,MA,50173.0,30803.0,541.0,2366.0,3087.0,3505.0,18136.0,3168.0,...,0.505,-45.657698,-3.275481,6493.0,51471.0,380509.30,7489.46,1725.0,11087.0,470.0


In [58]:
# Extraindo uma amostra de 10 municipios da base atual
indsocdem.sample(10)

Unnamed: 0,CIDADE,ESTADO,IBGE_RES_POP,IBGE_POP_URB,POP_ABAIXO_1,POP_1_4,POP_5_9,POP_10-14,POP_15-59,POP_60_MAIS,...,IDHM_Educacao,LONG,LAT,TV_CABO,POP_ESTIMADA,PIB,PIB_CAPITA,CARROS,MOTOS,TOT_EMPRESAS
5454,Várzea Do Poço,BA,8661.0,5784.0,90.0,319.0,432.0,501.0,3613.0,829.0,...,0.462,-40.318918,-11.52398,638.0,9130.0,79762.02,8481.71,702.0,1033.0,87.0
2427,Itaúna Do Sul,PR,3583.0,2546.0,32.0,147.0,194.0,236.0,1517.0,420.0,...,0.543,-52.885025,-22.726877,92.0,2951.0,71155.24,21640.89,1126.0,616.0,81.0
1965,Grão Mogol,MG,15024.0,5188.0,59.0,314.0,450.0,553.0,3330.0,482.0,...,0.482,-42.888013,-16.566495,224.0,15779.0,278094.11,17523.26,1163.0,1337.0,189.0
1850,Francisco Macedo,PI,2879.0,1137.0,27.0,81.0,92.0,99.0,713.0,125.0,...,0.425,-40.795567,-7.327871,9.0,3166.0,24.12,7748.65,214.0,245.0,11.0
1443,Coxixola,PB,1771.0,782.0,13.0,41.0,58.0,43.0,486.0,141.0,...,0.567,-36.60483,-7.628797,11.0,1907.0,18323.2,9598.32,102.0,174.0,26.0
1369,Constantina,RS,9752.0,6509.0,69.0,316.0,403.0,495.0,4247.0,979.0,...,0.668,-52.987739,-27.730411,423.0,9915.0,266161.98,26350.06,4670.0,1124.0,522.0
4525,Santanópolis,BA,8776.0,1679.0,26.0,109.0,160.0,139.0,1041.0,204.0,...,0.496,-38.867162,-12.028407,51.0,8920.0,56295.3,6097.84,582.0,757.0,35.0
4900,São Sebastião Do Maranhão,MG,10647.0,3237.0,42.0,197.0,338.0,367.0,1850.0,443.0,...,0.427,-42.565746,-18.091372,111.0,10129.0,69593.54,6587.8,774.0,1541.0,81.0
5346,Turvolândia,MG,4658.0,2860.0,45.0,122.0,232.0,239.0,1881.0,341.0,...,0.576,-45.796736,-21.879389,292.0,5008.0,97494.11,19518.34,1318.0,685.0,145.0
4164,Ribeira Do Piauí,PI,4263.0,1018.0,23.0,78.0,96.0,100.0,617.0,104.0,...,0.368,-42.714043,-7.690576,27.0,4464.0,42618.38,9701.43,37.0,100.0,30.0


In [59]:
# Extraindo uma amostra de 0.01% dos municipios da base original
indsocdem.sample(frac=0.001)

Unnamed: 0,CIDADE,ESTADO,IBGE_RES_POP,IBGE_POP_URB,POP_ABAIXO_1,POP_1_4,POP_5_9,POP_10-14,POP_15-59,POP_60_MAIS,...,IDHM_Educacao,LONG,LAT,TV_CABO,POP_ESTIMADA,PIB,PIB_CAPITA,CARROS,MOTOS,TOT_EMPRESAS
5221,Terra Nova,PE,9278.0,5012.0,75.0,372.0,485.0,530.0,3044.0,506.0,...,0.533,-39.374422,-8.230358,26.0,9983.0,62576.12,6067.69,693.0,1439.0,60.0
1562,Dionísio,MG,8739.0,7122.0,75.0,359.0,500.0,614.0,4547.0,1027.0,...,0.647,-42.782368,-19.843997,66.0,7852.0,69043.72,8246.0,1166.0,698.0,204.0
5301,Trindade,PE,26116.0,20550.0,405.0,1587.0,2081.0,2366.0,12245.0,1866.0,...,0.455,-40.265591,-7.761795,160.0,30222.0,291261.08,9760.11,2236.0,6128.0,387.0
2698,Lagoa Do Tocantins,TO,3525.0,2538.0,68.0,254.0,322.0,325.0,1377.0,192.0,...,0.471,-47.561447,-10.296887,28.0,4237.0,34778.82,8451.72,82.0,456.0,26.0
1864,Frei Rogério,SC,2474.0,706.0,5.0,25.0,63.0,86.0,446.0,81.0,...,0.588,-50.806408,-27.174751,74.0,2077.0,60429.66,27505.54,852.0,410.0,36.0
5211,Terenos,MS,17146.0,7240.0,129.0,463.0,602.0,617.0,4677.0,752.0,...,0.521,-54.865557,-20.442308,522.0,21311.0,461938.97,22658.51,2793.0,1962.0,236.0


In [60]:
# Extraindo uma amostra de 0.01% dos municipios da base original, mas fixando o resultado atual
indsocdem.sample(frac = 0.001, random_state = 123)

Unnamed: 0,CIDADE,ESTADO,IBGE_RES_POP,IBGE_POP_URB,POP_ABAIXO_1,POP_1_4,POP_5_9,POP_10-14,POP_15-59,POP_60_MAIS,...,IDHM_Educacao,LONG,LAT,TV_CABO,POP_ESTIMADA,PIB,PIB_CAPITA,CARROS,MOTOS,TOT_EMPRESAS
3237,Não-Me-Toque,RS,15936.0,13904.0,169.0,643.0,828.0,982.0,9466.0,1816.0,...,0.673,-52.819279,-28.459907,666.0,17484.0,1019762.59,59656.17,7598.0,2365.0,1034.0
843,Cachoeira De Minas,MG,11034.0,7000.0,93.0,377.0,495.0,611.0,4484.0,940.0,...,0.637,-45.782138,-22.353984,574.0,11514.0,330805.98,28500.56,2638.0,1165.0,251.0
3521,Palestina De Goiás,GO,3371.0,2177.0,12.0,98.0,145.0,178.0,1460.0,284.0,...,0.66,-51.530032,-16.741432,28.0,3460.0,99701.83,28429.38,450.0,499.0,35.0
2123,Ibitiúra De Minas,MG,3382.0,2347.0,20.0,91.0,192.0,186.0,1504.0,354.0,...,0.579,-46.437603,-22.057055,135.0,3483.0,51606.97,14627.83,928.0,427.0,53.0
738,Botuverá,SC,4468.0,1308.0,18.0,44.0,75.0,82.0,932.0,157.0,...,0.604,-49.078484,-27.202107,322.0,5169.0,249518.83,49685.15,2094.0,1274.0,211.0
923,Camargo,RS,2592.0,1095.0,10.0,48.0,84.0,57.0,703.0,193.0,...,0.612,-52.203799,-28.589238,34.0,2724.0,174032.32,63841.64,1255.0,251.0,95.0
