### Imports

In [24]:
import requests
from datetime import date
import pandas as pd
import itertools
import csv
import re
from unicodedata import normalize
from decouple import Config, RepositoryEnv

### Getting the urls from the config file

In [25]:
DOTENV_FILE = '.env'
config = Config(RepositoryEnv(DOTENV_FILE))

In [26]:
url_museos = config('URL_MUSEOS')
url_cines = config('URL_CINES')
url_bibliotecas = config('URL_BIBLIOTECAS')

### Function to create the name of the csv files with the date

In [27]:
def create_name(categoria):
    name = categoria + '\\' + date.today().strftime('%Y-%m') + '\\' + categoria + '-' + date.today().strftime('%d-%m-%Y') + '.csv'
    return name

### Creating the names of the csv files 

In [28]:
name_museos = create_name('museos')
name_cines = create_name('cines')
name_bibliotecas = create_name('bibliotecas')

### Header without accent and to lower case

In [29]:

def normalize_string(s):
    s = re.sub(r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize('NFD', s), 0, re.I)
    return normalize('NFC', s).lower()

In [74]:
def change_header_csv(name):
    with open(name, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  
        header = list(map(normalize_string, header))
        rows = [header] + list(reader)  
    with open(name, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(rows)  

### Downloading the CSV

In [59]:
def get_csv(url, name):
    r = requests.get(url)
    with open(name, 'wb') as f:
        f.write(r.content)
    change_header_csv(name)
    

In [70]:
def download_data():
    get_csv(url_museos, name_museos)
    get_csv(url_cines, name_cines)
    get_csv(url_bibliotecas, name_bibliotecas)

In [75]:
download_data()

## Processing Data

### Creating the DataFrame 1

In [182]:
df_museos = pd.read_csv(name_museos)
df_cines = pd.read_csv(name_cines)
df_bibliotecas = pd.read_csv(name_bibliotecas)

In [183]:
def changeHeader(df):
    df.rename({'cod_loc': 'cod_localidad', 'idprovincia': 'id_provincia', 'iddepartamento': 'id_departamento', 'cp': 'cod_postal', 'telefono': 'numero_telefono'}, axis=1, inplace=True)
    return df

In [186]:
def normalize_table(museos, cines, bibliotecas):
    headersBiliotecas = ['cod_loc', 'idprovincia', 'iddepartamento', 'categoria', 'provincia', 'localidad', 'nombre', 'cp', 'telefono', 'mail', 'web','domicilio']
    headers = ['cod_loc', 'idprovincia', 'iddepartamento', 'categoria', 'provincia', 'localidad', 'nombre','cp', 'telefono', 'mail', 'web']
    
    df_museos2 = df_museos[headers]
    df_cines2 = df_cines[headers]
    df_bibliotecas2 = df_bibliotecas[headersBiliotecas]

    #Merge the dataframes
    df_normalize = pd.concat([df_museos2, df_cines2, df_bibliotecas2])
    return changeHeader(df_normalize)

In [187]:
table1 = normalize_table(name_museos,name_cines,name_bibliotecas)

In [None]:
table1

### Creating the DataFrame 2

In [235]:
def register_count_table():
     #Headers of the dataframes 
    headers = ['id', 'categoria', 'fuente', 'provincia_categoria', 'cantidad']
   
    #Get categories and count how many times they appear from bibliotecas
    df_bibliotecas2 = df_bibliotecas[['categoria', 'provincia', 'fuente']]
    df_cines2 = df_cines[['categoria', 'provincia', 'fuente']]
    df_museos2 = df_museos[['categoria', 'provincia', 'fuente']]

    
    #Count categories  from bibliotecas
    categoria_biblioteca = df_bibliotecas2.groupby(['categoria']).size().reset_index(name='registros_categorias')
    fuente_biblioteca = df_bibliotecas2.groupby(['fuente']).size().reset_index(name='registros_fuentes')
    provincia_categoria_biblioteca = df_bibliotecas2.groupby(['provincia', 'categoria']).size().reset_index(name='registros_provincia_categoria')

    #Count categories  from cines
    categoria_cine = df_cines2.groupby(['categoria']).size().reset_index(name='registros_categorias')
    fuente_cine = df_cines2.groupby(['fuente']).size().reset_index(name='registros_fuentes')
    provincia_categoria_cine = df_cines2.groupby(['provincia', 'categoria']).size().reset_index(name='registros_provincia_categoria')

    #Count categories  from museos
    categoria_museo = df_museos2.groupby(['categoria']).size().reset_index(name='registros_categorias')
    fuente_museo = df_museos2.groupby(['fuente']).size().reset_index(name='registros_fuentes')
    provincia_categoria_museo = df_museos2.groupby(['provincia', 'categoria']).size().reset_index(name='registros_provincia_categoria')
    #Merge the dataframes
    df_normalize = pd.concat([categoria_biblioteca, fuente_biblioteca, provincia_categoria_biblioteca, categoria_cine, fuente_cine, provincia_categoria_cine, categoria_museo, fuente_museo, provincia_categoria_museo])
    df_normalize.to_csv('cantidad_categorias.csv', index=False)
    return df_normalize
    
    

In [236]:
register_count_table()

Unnamed: 0,categoria,registros_categorias,fuente,registros_fuentes,provincia,registros_provincia_categoria,id
0,Bibliotecas Populares,2017.0,,,,,1
0,,,CONABIP,2010.0,,,1
1,,,Gob. Pcia.,1.0,,,2
2,,,Secretaria de Cultura Provincial,6.0,,,3
0,Bibliotecas Populares,,,,Buenos Aires,543.0,1
...,...,...,...,...,...,...,...
19,Espacios de Exhibición Patrimonial,,,,Santa Cruz,20.0,20
20,Espacios de Exhibición Patrimonial,,,,Santa Fe,162.0,21
21,Espacios de Exhibición Patrimonial,,,,Santiago del Estero,10.0,22
22,Espacios de Exhibición Patrimonial,,,,"Tierra del Fuego, Antártida e Islas del Atlánt...",14.0,23


In [233]:
def info_cine():
   df_cines2 = df_cines[['provincia', 'pantallas', 'butacas', 'espacio_incaa']]
   return df_cines2

In [234]:
info_cine()

Unnamed: 0,provincia,pantallas,butacas,espacio_incaa
0,Catamarca,5,743,
1,Catamarca,1,440,0
2,Chaco,5,820,
3,Chubut,1,80,
4,Corrientes,1,240,
...,...,...,...,...
324,Buenos Aires,1,0,
325,Buenos Aires,8,2037,
326,Buenos Aires,1,430,si
327,Catamarca,1,1103,si
