In [25]:
import requests
import pandas as pd

In [26]:
API_URL = 'https://apis.datos.gob.ar/series/api/series?ids=151.1_AARIADOTAC_2012_M_25,151.1_AARIADOTAC_2012_M_26,151.1_IPENDIETAC_2012_M_34,151.1_IPENDIETAC_2012_M_43,151.1_TL_SIN_TAC_2012_M_15'
response = requests.get(API_URL)
data = response.json()

In [27]:
# Convertir el JSON a un DataFrame
df = pd.DataFrame(data['data'])

In [28]:
# Asignamos nuevos nombres a las columnas
df.columns = ["Fecha", 
              "SectorPublico", 
              "SectorPrivado", 
              "IndependientesAutonomos",
              "IndependientesMonotributo",
              "TotalTrabajadores"]


In [29]:
# Creando columna "RestoDeSectores"
df['RestoDeSectores'] = df['TotalTrabajadores'] - df['SectorPublico'] - df['SectorPrivado'] - df['IndependientesAutonomos'] - df['IndependientesMonotributo']
# Convirtiendo la columna "Fecha" a formato Fecha
df["Fecha"] = pd.to_datetime(df["Fecha"])

In [30]:
df.dtypes

Fecha                        datetime64[ns]
SectorPublico                       float64
SectorPrivado                       float64
IndependientesAutonomos             float64
IndependientesMonotributo           float64
TotalTrabajadores                   float64
RestoDeSectores                     float64
dtype: object

In [31]:
df

Unnamed: 0,Fecha,SectorPublico,SectorPrivado,IndependientesAutonomos,IndependientesMonotributo,TotalTrabajadores,RestoDeSectores
0,2012-01-01,2548.566,6067.902,408.340,167.762,10896.612,1704.042
1,2012-02-01,2607.289,6061.695,408.285,168.913,10950.353,1704.171
2,2012-03-01,2620.076,6072.139,407.632,170.047,10974.797,1704.903
3,2012-04-01,2623.691,6056.708,408.079,172.512,10970.445,1709.455
4,2012-05-01,2596.482,6065.601,408.331,172.926,10958.421,1715.081
...,...,...,...,...,...,...,...
95,2019-12-01,3211.976,6004.908,404.256,365.604,12112.076,2125.332
96,2020-01-01,3208.978,5988.152,402.338,366.082,12095.956,2130.406
97,2020-02-01,3210.726,5990.463,398.154,365.381,12092.024,2127.300
98,2020-03-01,3218.267,5945.303,392.416,362.447,12023.385,2104.952


In [32]:
# Importing neccesary libraries and initiating connection
import psycopg2
import pandas as pd
from sqlalchemy import create_engine

conn_string = "host=data-engineer-cluster.cyhh5bfevlmn.us-east-1.redshift.amazonaws.com port=5439 dbname=data-engineer-database user=leo_carulli_coderhouse password=PT3o7PK1Rh"
conn = psycopg2.connect(conn_string)

In [33]:
# Create the SQLAlchemy engine
engine = create_engine('postgresql+psycopg2://', creator=lambda: conn)

In [34]:
# Create the table if it doesn't exist
create_table_query = """
    CREATE TABLE IF NOT EXISTS Salarios (
        Fecha DATE DISTKEY,
        SectorPublico NUMERIC,
        SectorPrivado NUMERIC,
        IndependientesAutonomos NUMERIC,
        IndependientesMonotributo NUMERIC,
        TotalTrabajadores NUMERIC,
        RestoDeSectores NUMERIC
    )
    SORTKEY (Fecha);
"""
with conn.cursor() as cursor:
    cursor.execute(create_table_query)
    conn.commit()

In [35]:
# Convert the DataFrame to a temporary table
df.to_sql('temp_table', engine, index=False, if_exists='replace')

In [36]:
# Execute an SQL query to insert data into the target table
with conn.cursor() as cursor:
    query = "INSERT INTO Salarios (Fecha, SectorPublico, SectorPrivado, IndependientesAutonomos, IndependientesMonotributo, TotalTrabajadores, RestoDeSectores) SELECT * FROM temp_table;"
    cursor.execute(query)
    conn.commit()

In [37]:
# Execute the query to check results
with conn.cursor() as cursor:
    query = "SELECT * FROM Salarios ORDER BY fecha DESC;"
    cursor.execute(query)
    result = cursor.fetchall()
    # Process the query result as needed
    for row in result:
        print(row)

(datetime.date(2020, 4, 1), Decimal('3206'), Decimal('5837'), Decimal('385'), Decimal('355'), Decimal('11860'), Decimal('2077'))
(datetime.date(2020, 3, 1), Decimal('3218'), Decimal('5945'), Decimal('392'), Decimal('362'), Decimal('12023'), Decimal('2105'))
(datetime.date(2020, 2, 1), Decimal('3211'), Decimal('5990'), Decimal('398'), Decimal('365'), Decimal('12092'), Decimal('2127'))
(datetime.date(2020, 1, 1), Decimal('3209'), Decimal('5988'), Decimal('402'), Decimal('366'), Decimal('12096'), Decimal('2130'))
(datetime.date(2019, 12, 1), Decimal('3212'), Decimal('6005'), Decimal('404'), Decimal('366'), Decimal('12112'), Decimal('2125'))
(datetime.date(2019, 11, 1), Decimal('3204'), Decimal('6024'), Decimal('404'), Decimal('365'), Decimal('12119'), Decimal('2121'))
(datetime.date(2019, 10, 1), Decimal('3207'), Decimal('6044'), Decimal('399'), Decimal('355'), Decimal('12135'), Decimal('2130'))
(datetime.date(2019, 9, 1), Decimal('3201'), Decimal('6067'), Decimal('400'), Decimal('359'), 

In [None]:
# Close the connection
conn.close()