## Script to fetch disconnected accounts data and append to file

In [1]:
# import libraries
import pandas as pd
import os

### Fetch and clean data from file

In [92]:
# create empty dataframe
disconnections_df = pd.DataFrame(columns = ['disconnectionDate', 'count'])

# read in all files in the disconnected folder
directory = r'../data/raw/disconnected'
for file in os.listdir(directory):
    if file.endswith('.csv'):
        # read in file
        df = pd.read_csv('{directory}/{file}'.format(directory=directory, file=file))
        
        # create column for disconnection date
        df['disconnectionDate'] = df['Date'].str.split(' ').str[0]
        
        # convert the 'Date' column to datetime format
        df['disconnectionDate'] = pd.to_datetime(df['disconnectionDate'])
        
        # change 'Date' column type to string
        df['disconnectionDate'] = df['disconnectionDate'].astype(str)

        # create column for disconnection time
        df['disconnectionTime'] = df['Date'].str.split(' ').str[0]

        # drop 'Date' column
        df = df.drop(columns=['Date'])
        
        # aggregate data by disconnection date
        df = df[['disconnectionDate', 'Address']].groupby('disconnectionDate').count().sort_values(by='disconnectionDate', ascending=True).reset_index()
        
        # rename address column
        df = df.rename(columns={"Address": "count"})
        
        # append data
        disconnections_df = disconnections_df.append(df)
    else:
        pass


## Clean data

In [93]:
# convert disconnections_df dates from string to datetime
disconnections_df['disconnectionDate'] = pd.to_datetime(disconnections_df['disconnectionDate'])

# fill in missing dates with '0' for the count
disconnections_df = disconnections_df.set_index('disconnectionDate').asfreq('D').reset_index().fillna(0)

## ADD 1-1-2021 -> 4-26-2021, 0 COUNT

# convert dates back to string
disconnections_df.disconnectionDate = disconnections_df.disconnectionDate.astype(str)

### Export data

In [94]:
# disconnections_df.to_csv('../data/clean/2021.csv')
disconnections_df.to_json(r'../data/clean/disconnections/2021-draft.json', orient="records", date_format='%Y-%m-%d')