# 12-create-flat-file
This notebook takes all the cleaned CIN Census and merges them into a unique excel csv where each row is a date event (referral, CIN start, CIN end, etc.) with demographics info and a few relevant info to the event.
This means that there are a LOT of columns.

In [None]:
import glob
import os
import yaml
from wrangling.log.cin_log import build_cinrecord

%run "00-config.ipynb"
%load_ext autoreload
%autoreload 2

### Config

In [None]:
localauthority = 'Waltham Forest' # Write name of LA - this must match the folder name

#### Define input and output folder

In [None]:
input_folder = os.path.join(cin_cleaned, localauthority)
output_folder = flatfile_folder

print("Input folder: {} \nOutput folder: {}".format(input_folder, output_folder))

### 1. Run programme (takes a few minutes depending on quantity of data)

In [None]:
cin_files = glob.glob(os.path.join(input_folder, "*.xml"))
print("Found {} CIN files in folder {}".format(len(cin_files), input_folder))

In [None]:
# Create CIN record
cin = build_cinrecord(cin_files)

### 2. Quick data checks - does it look sensible?

In [None]:
# Print a few values - check they make sense

print(
    "\n{} unique Child IDs".format(len(cin.LAchildID.unique())),
    "\n{} first event".format(cin.Date.min()),
    "\n{} last event".format(cin.Date.max())
)

for col in ['Date', 'LAchildID', 'GenderCurrent', 'Ethnicity', 'Disabilities', 'PersonBirthDate']:
    print("\n{} missing values for {}".format(cin[col].isna().sum(), col))

In [None]:
# Look at event distribution
cin["Date"] = cin["Date"].astype("datetime64")
cin.groupby(cin["Date"].dt.year)['Date'].count().plot(kind="bar", figsize=(10,7))

### 3. Save to csv

In [None]:
# Add column with LA name
cin['LA'] = localauthority

# Save
cin.to_csv(os.path.join(output_folder, "{}_flatcin.csv".format(localauthority)), index=False)