In [41]:
import pandas as pd
import numpy as np
import re
import datetime

In [68]:
def create_submission_df(crash_data_df, date_start='2019-07-01', date_end='2020-01-01'):
    '''Takes crash data and creates a data frame in the format needed for submission'''
    
    #calculate centroid for ambulance placement
    lat_centroid = df.latitude.mean()
    lon_centroid = df.longitude.mean()

    
    # Create Date range covering submission period set
    dates = pd.date_range(date_start, date_end, freq='3h')
        
    # Create submission dataframe
    submission_df = pd.DataFrame({
        'date':dates
    })
    for ambulance in range(6):
        # Place an ambulance in the center of the city:
        submission_df['A'+str(ambulance)+'_Latitude'] = lat_centroid
        submission_df['A'+str(ambulance)+'_Longitude'] = lon_centroid
    return submission_df

In [72]:
def create_submission_csv(submission_df, model_name='baseline', path='../Outputs/'):
    '''Takes dataframe in submission format and outputs a csv file with matching name'''
    current_time = datetime.datetime.now()
    filename = f'{current_time.year}{current_time.month}{current_time.day}_{model_name}.csv'
    submission_df.to_csv(path+filename,index=False)


In [70]:
# Load raw crash data from csv
crash_data_df = pd.read_csv('../Inputs/Train.csv', parse_dates=['datetime'])

In [74]:
# call function to create submission df and csv output
submission_df = create_submission_df(crash_data_df)
create_submission_csv(submission_df,'weighted_average')

In [66]:
# View contents of submission
submission_df.head()

Unnamed: 0,date,A0_Latitude,A0_Longitude,A1_Latitude,A1_Longitude,A2_Latitude,A2_Longitude,A3_Latitude,A3_Longitude,A4_Latitude,A4_Longitude,A5_Latitude,A5_Longitude
0,2019-07-01 00:00:00,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459
1,2019-07-01 03:00:00,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459
2,2019-07-01 06:00:00,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459
3,2019-07-01 09:00:00,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459
4,2019-07-01 12:00:00,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459,-1.270338,36.855459


In [10]:
# functions for saving post submission details to a dataframe
def get_submission_details(copypaste):
    split_input = re.split("\s", copypaste)
    id_str = split_input[0]
    score_float = split_input[-1]
    file_name = filename = re.findall('\w*.csv\w*', copypaste)[-1]
    comment_str = ''
    submitted_datetime = ''
    submission_row = [id_str, file_name, score_float, submitted_datetime, comment_str]
    return submission_row
def submission_dataframe(rows,columns=['ID', 'FILE', 'SCORE', 'SUBMITTED', 'COMMENT']):
    return pd.DataFrame(data=[rows], columns = columns)

def create_dataframe(copypaste):
    return submission_dataframe(get_submission_details(copypaste))

# the below needs work
def add_to_submission_dataframe(df, rows, columns=['ID', 'FILE', 'SCORE', 'SUBMITTED', 'COMMENT']):
    return df.append(zip([columns,rows]), ignore_index=True)

def update_dataframe(df, copypaste)
    rows = get_submission_details(copypaste)
    return add_to_submission_dataframe(df, rows)