# Join Import Data

Para esta parte, es necesario tener los datos separados por año, mes y la exportacion con los complementes. Es decir, haber ejecutado previamente **Filter export data**, **Filter import data**, y **Complement export data**. 

In [1]:
import os
import pandas as pd
import csv
import json

Generamos una lista con todas las columnas de los archivos a analizar.

In [2]:
with open("csv_columns.json") as file:
    columns = json.load(file)

columns_import = columns["columns_import_without_join"]
columns_export = columns["columns_export_without_join"]

Creamos la carpeta donde guardaremos los nuevos archivos unidos, para eso primero necesitamos el directorio de trabajo (Working directory)

In [3]:
# Working directory
cwd = os.getcwd()

# Create filter folder
if not os.path.exists(cwd + os.sep + "Join_Filter_Data"):
    os.makedirs(cwd + os.sep + "Join_Filter_Data")

# Create import folder
if not os.path.exists(cwd + os.sep + "Join_Filter_Data" + os.sep + "import"):
    os.makedirs(cwd + os.sep + "Join_Filter_Data" + os.sep + "import")
    
    
# Create export folder
if not os.path.exists(cwd + os.sep + "Join_Filter_Data" + os.sep + "export"):
    os.makedirs(cwd + os.sep + "Join_Filter_Data" + os.sep + "export")

Unimos la información

In [6]:
# Import data
read_path = cwd + os.sep + "Filter_Data" + os.sep + "import"
write_path = cwd + os.sep + "Join_Filter_Data" + os.sep + "import"

total_data = []
for year in os.listdir(read_path):
    year_path = read_path + os.sep + year
    filename_out = write_path + os.sep + year + ".txt"
    
    df_from_each_month = []
    
    for month in os.listdir(year_path):
        filename = year_path + os.sep + month
        
        csv_file = pd.read_csv(filename, names=columns_import,
                               header=None, sep=";", encoding="latin1",
                               quoting=csv.QUOTE_NONE)
        
        csv_file["YEAR"] = [year for _ in range(csv_file.shape[0])]
        csv_file["MONTH"] = [month[:2] for _ in range(csv_file.shape[0])]
        
        df_from_each_month.append(csv_file)
        total_data.append(csv_file)
            
    year_df = pd.concat(df_from_each_month, ignore_index=True)    
    year_df.to_csv(filename_out, sep=";", encoding="latin1", header=True,
                   index=False, quoting=csv.QUOTE_NONE)
all_df = pd.concat(total_data, ignore_index=True)    
all_df.to_csv(write_path + os.sep + "all_import.txt", sep=";", encoding="latin1",
               index=False, quoting=csv.QUOTE_NONE)
    
print("Import join")


# Export data
read_path = cwd + os.sep + "Filter_Data" + os.sep + "export"
write_path = cwd + os.sep + "Join_Filter_Data" + os.sep + "export"

total_data = []
for year in os.listdir(read_path):
    year_path = read_path + os.sep + year
    filename_out = write_path + os.sep + year + ".txt"
    
    df_from_each_month = []
    
    for month in os.listdir(year_path):
        filename = year_path + os.sep + month
        
        csv_file = pd.read_csv(filename, names=columns_export,
                               header=None, sep=";", encoding="latin1",
                               quoting=csv.QUOTE_NONE)
        
        csv_file["YEAR"] = [year for _ in range(csv_file.shape[0])]
        csv_file["MONTH"] = [month[:2] for _ in range(csv_file.shape[0])]
        
        df_from_each_month.append(csv_file)
        total_data.append(csv_file)
            
    year_df = pd.concat(df_from_each_month, ignore_index=True)    
    year_df.to_csv(filename_out, sep=";", encoding="latin1", header=True,
                   index=False, quoting=csv.QUOTE_NONE)
all_df = pd.concat(total_data, ignore_index=True)    
all_df.to_csv(write_path + os.sep + "all_export.txt", sep=";", encoding="latin1",
               index=False, quoting=csv.QUOTE_NONE)

    
print("Export join")

Import join
Export join
