# 1) Preparación previa

### Carga de librerías

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from IPython.display import display

### Lectura del dataset

Se decidió utilizar unos datos de votos legislativos en EEUU, que describe si se votó afirmativamente, negativamente o absteniéndose en 16 leyes.

In [None]:
df_votes = pd.read_csv('https://raw.githubusercontent.com/Agustin-Bulzomi/Projects/main/Programming/Digital%20House/Support%20Files/Project%203/House_votes_1984.csv')

### Vista general

Se corren varias funciones para obtener un resumen general del dataset

In [None]:
df_votes.info()

In [None]:
df_votes.head(10)

# 2) Análisis exploratorio

Se comienza con el análisis general: ¿cómo se distribuye la variable target? ¿están balanceados los datos?

In [None]:
balance = pd.concat([df_votes['Class Name'].value_counts(), df_votes['Class Name'].value_counts(normalize=True).round(2)], axis = 1)
balance.columns = ['Votos', 'Proporción']
balance

In [None]:
# Se crea una paleta con los típicos colores partidarios, ya que será usada a futuro
color_partidos = ['red', 'blue']

g = sns.catplot(x='Class Name', 
                    data=df_votes, kind="count", palette=color_partidos,
                    height=5, aspect=.8);

Hay un importante desbalance considerando que casi 2 tercios de los legisladores son demócratas y 1 tercio republicano. Este dato es importante a la hora de contrastar votos. A continuación se procederá con el análisis más específico, ley por ley. Para ello, se crea una nueva columna con el nombre Target ya que es necesaria para realizar análisis con una variable numérica

In [None]:
df_votes['Target'] = np.where(df_votes['Class Name'] == 'democrat', 1, 0)

In [None]:
# La exploración ley por ley que se dará a continuación puede ser hecha en una sola celda con un for loop, que se deja a disposición a continuación.
# Sin embargo, el for loop imprime primero el texto de las 16 leyes y al final los 16 gráficos, haciendo complicada la lectura.

#laws = [' handicapped-infants', ' water-project-cost-sharing',
#       ' adoption-of-the-budget-resolution', ' physician-fee-freeze',
#       ' el-salvador-aid', ' religious-groups-in-schools',
#       ' anti-satellite-test-ban', ' aid-to-nicaraguan-contras', ' mx-missile',
#       ' immigration', ' synfuels-corporation-cutback', ' education-spending',
#       ' superfund-right-to-sue', ' crime', ' duty-free-exports',
#       ' export-administration-act-south-africa']

#for law in laws:
#    numero = laws.index(law) + 1
#    print("Ley N°", numero, law)
#   porcentajes = df_votes[law].value_counts()
#    print(porcentajes)
#    table = pd.concat([100*(df_votes.groupby(law)['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(law)['Target'].mean().round(2)))], axis = 1)
#    table.columns = ["Democrat", "Republican"]
#    display(table)
#    grafico = sns.catplot(x = law, hue = "Class Name", data = df_votes, kind="count", palette = color_partidos, height = 8, aspect = .8);
#    grafico

## Ley N° 1: Handicapped-Infants

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' handicapped-infants'].value_counts()

#### Porcentaje de votos según partido

In [None]:
handicapped = pd.concat([100*(df_votes.groupby(' handicapped-infants')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' handicapped-infants')['Target'].mean().round(2)))], axis = 1)
handicapped.columns = ["Democrat", "Republican"]
handicapped

#### Gráfico

In [None]:
g = sns.catplot(x=' handicapped-infants', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 2: water-project-cost-sharing

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' water-project-cost-sharing'].value_counts()

#### Porcentaje de votos según partido

In [None]:
water = pd.concat([100*(df_votes.groupby(' water-project-cost-sharing')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' water-project-cost-sharing')['Target'].mean().round(2)))], axis = 1)
water.columns = ["Democrat", "Republican"]
water

#### Gráfico

In [None]:
g = sns.catplot(x=' water-project-cost-sharing', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 3: adoption-of-the-budget-resolution

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' adoption-of-the-budget-resolution'].value_counts()

#### Porcentaje de votos según partido

In [None]:
adoption = pd.concat([100*(df_votes.groupby(' adoption-of-the-budget-resolution')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' adoption-of-the-budget-resolution')['Target'].mean().round(2)))], axis = 1)
adoption.columns = ["Democrat", "Republican"]
adoption

#### Gráfico

In [None]:
g = sns.catplot(x=' adoption-of-the-budget-resolution', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 4: physician-fee-freeze

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' physician-fee-freeze'].value_counts()

#### Porcentaje de votos según partido

In [None]:
physician = pd.concat([100*(df_votes.groupby(' physician-fee-freeze')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' physician-fee-freeze')['Target'].mean().round(2)))], axis = 1)
physician.columns = ["Democrat", "Republican"]
physician

#### Gráfico

In [None]:
g = sns.catplot(x=' physician-fee-freeze', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 5: el-salvador-aid

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' el-salvador-aid'].value_counts()

#### Porcentaje de votos según partido

In [None]:
salvador = pd.concat([100*(df_votes.groupby(' el-salvador-aid')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' el-salvador-aid')['Target'].mean().round(2)))], axis = 1)
salvador.columns = ["Democrat", "Republican"]
salvador

#### Gráfico

In [None]:
g = sns.catplot(x=' el-salvador-aid', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 6: religious-groups-in-schools

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' religious-groups-in-schools'].value_counts()

#### Porcentaje de votos según partido

In [None]:
religious = pd.concat([100*(df_votes.groupby(' religious-groups-in-schools')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' religious-groups-in-schools')['Target'].mean().round(2)))], axis = 1)
religious.columns = ["Democrat", "Republican"]
religious

#### Gráfico

In [None]:
g = sns.catplot(x=' religious-groups-in-schools', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 7: anti-satellite-test-ban

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' anti-satellite-test-ban'].value_counts()

#### Porcentaje de votos según partido

In [None]:
satellite = pd.concat([100*(df_votes.groupby(' anti-satellite-test-ban')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' anti-satellite-test-ban')['Target'].mean().round(2)))], axis = 1)
satellite.columns = ["Democrat", "Republican"]
satellite

#### Gráfico

In [None]:
g = sns.catplot(x=' anti-satellite-test-ban', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 8: aid-to-nicaraguan-contras

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' aid-to-nicaraguan-contras'].value_counts()

#### Porcentaje de votos según partido

In [None]:
nicaragua = pd.concat([100*(df_votes.groupby(' aid-to-nicaraguan-contras')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' aid-to-nicaraguan-contras')['Target'].mean().round(2)))], axis = 1)
nicaragua.columns = ["Democrat", "Republican"]
nicaragua

#### Gráfico

In [None]:
g = sns.catplot(x=' aid-to-nicaraguan-contras', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 9: mx-missile

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' mx-missile'].value_counts()

#### Porcentaje de votos según partido

In [None]:
missile = pd.concat([100*(df_votes.groupby(' mx-missile')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' mx-missile')['Target'].mean().round(2)))], axis = 1)
missile.columns = ["Democrat", "Republican"]
missile

#### Gráfico

In [None]:
g = sns.catplot(x=' mx-missile', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 10: immigration

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' immigration'].value_counts()

#### Porcentaje de votos según partido

In [None]:
immigration = pd.concat([100*(df_votes.groupby(' immigration')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' immigration')['Target'].mean().round(2)))], axis = 1)
immigration.columns = ["Democrat", "Republican"]
immigration

#### Gráfico

In [None]:
g = sns.catplot(x=' immigration', hue="Class Name", 
                    data=df_votes,  kind= 'count', palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 11: synfuels-corporation-cutback

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' synfuels-corporation-cutback'].value_counts()

#### Porcentaje de votos según partido

In [None]:
synfuels = pd.concat([100*(df_votes.groupby(' synfuels-corporation-cutback')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' synfuels-corporation-cutback')['Target'].mean().round(2)))], axis = 1)
synfuels.columns = ["Democrat", "Republican"]
synfuels

#### Gráfico

In [None]:
g = sns.catplot(x=' synfuels-corporation-cutback', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 12: education-spending

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' education-spending'].value_counts()

#### Porcentaje de votos según partido

In [None]:
education = pd.concat([100*(df_votes.groupby(' education-spending')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' education-spending')['Target'].mean().round(2)))], axis = 1)
education.columns = ["Democrat", "Republican"]
education

#### Gráfico

In [None]:
g = sns.catplot(x=' education-spending', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 13: superfund-right-to-sue

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' superfund-right-to-sue'].value_counts()

#### Porcentaje de votos según partido

In [None]:
superfund = pd.concat([100*(df_votes.groupby(' superfund-right-to-sue')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' superfund-right-to-sue')['Target'].mean().round(2)))], axis = 1)
superfund.columns = ["Democrat", "Republican"]
superfund

#### Gráfico

In [None]:
g = sns.catplot(x=' superfund-right-to-sue', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 14: crime

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' crime'].value_counts()

#### Porcentaje de votos según partido

In [None]:
crime = pd.concat([100*(df_votes.groupby(' crime')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' crime')['Target'].mean().round(2)))], axis = 1)
crime.columns = ["Democrat", "Republican"]
crime

#### Gráfico

In [None]:
g = sns.catplot(x=' crime', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 15: duty-free-exports

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' duty-free-exports'].value_counts()

#### Porcentaje de votos según partido

In [None]:
dutyfree = pd.concat([100*(df_votes.groupby(' duty-free-exports')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' duty-free-exports')['Target'].mean().round(2)))], axis = 1)
dutyfree.columns = ["Democrat", "Republican"]
dutyfree

#### Gráfico

In [None]:
g = sns.catplot(x=' duty-free-exports', hue="Class Name", 
                    data=df_votes, kind="count", palette=color_partidos,
                    height = 8, aspect=.8);

## Ley N° 16: export-administration-act-south-africa

#### Totales entre ambos partidos según voto realizado

In [None]:
df_votes[' export-administration-act-south-africa'].value_counts()

#### Porcentaje de votos según partido

In [None]:
export = pd.concat([100*(df_votes.groupby(' export-administration-act-south-africa')['Target'].mean().round(2)), 100* (1 - (df_votes.groupby(' export-administration-act-south-africa')['Target'].mean().round(2)))], axis = 1)
export.columns = ["Democrat", "Republican"]
export

#### Gráfico

In [None]:
g = sns.catplot(x=' export-administration-act-south-africa', hue="Class Name", 
                    data=df_votes, kind="count", color= 'blue', palette= color_partidos,
                    height = 8, aspect=.8);

# 3) Exportación

Se prepara la base de datos antes de ser exportada para aplicar modelos en otra notebook. Ya la variable target tiene su columna numérica homónima. Ahora se procede a reemplazar los valores string de los votos por otros numéricos (n = -1, ? = 0,  y =  1)

In [None]:
votos_replace = df_votes.replace(to_replace = ["n", "?", "y"], value = [-1, 0, 1])
votos_replace.head(10)

Se exporta la base de datos

In [None]:
votos_replace.to_csv('votos_final.csv', index = False)