### Merge
Helper code used to merge output csvs throughout the scraping process for our preliminary dataset

In [63]:
import csv
import pandas as pd
    
"""Our code would save the cases for each particular day and court in case of an IP failure.
    This code was used to merge the court csvs in the case of a failure for some day."""
date = "01.31.2020"
file_names = ["./data/"+court+"_"+date+"-"+date+".csv" for court in ['Central Housing', 'Eastern Housing', 'Metro South Housing', 'Northeast Housing', 'Southeast Housing', 'Western Housing']]

out_name = "./data/"+date+"-"+date+"2.csv"
with open(out_name, "a") as outfile:
    writer = csv.writer(outfile)
    
    i = 0
    while i < len(file_names):
        try:
            with open(file_names[i]) as infile:
                reader = csv.reader(infile)

                # first file:
                for row in reader:
                    writer.writerow(row)
                break
        except FileNotFoundError:
            i+=1
        
    
    # now the rest:  
    for f_name in file_names[i:]:
        try:
            with open(f_name) as infile:
                reader = csv.reader(infile)
                next(reader, None)
                for row in reader:
                    writer.writerow(row)
        except FileNotFoundError:
            continue

df = pd.read_csv(out_name)
df = df.drop_duplicates(subset="caseNum", ignore_index=True).drop('Unnamed: 0', axis=1)
df.to_csv(out_name)

In [64]:
"""This code merges the csvs for each day into one."""
days = [str(day) if day >= 10 else '0'+str(day) for day in range(1,32)]
file_names = ["./data/01."+day+".2020-01."+day+".2020.csv" for day in days]

out_name = "./data/01.01.2020-01.31.2020.csv"
with open(out_name, "a") as outfile:
    writer = csv.writer(outfile)
    
    with open(file_names[0]) as infile:
        reader = csv.reader(infile)

        # first file:
        for row in reader:
            writer.writerow(row)
    
    # now the rest:  
    for f_name in file_names[1:]:
        try:
            with open(f_name) as infile:
                reader = csv.reader(infile)
                next(reader, None)
                for row in reader:
                    writer.writerow(row)
        except FileNotFoundError:
            continue
df = pd.read_csv(out_name)
df = df.drop_duplicates(subset="caseNum", ignore_index=True).drop('Unnamed: 0', axis=1)
df.to_csv(out_name)

In [2]:
import pandas as pd

"""Finally, we merged the data for the two months together, and then into a full dataset."""
housing = pd.read_csv("01.01.2020-01.31.2020.csv").append(pd.read_csv("01.01.2021-01.31.2021.csv"), ignore_index=True).drop('Unnamed: 0', axis=1)
district = pd.read_csv("D01.01.2020-01.31.2020.csv").append(pd.read_csv("D01.01.2021-01.31.2021.csv"), ignore_index=True).drop('Unnamed: 0', axis=1)
full = housing.append(district, ignore_index=True)

housing.to_csv("housing_dataset.csv")
district.to_csv("district_dataset.csv")
full.to_csv("full_dataset.csv")