# Cyclic Encoding

## Imports

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import FunctionTransformer

## Helper Functions

In [None]:
def is_float(num):
    try:
        return float(num)
    except ValueError:
        return np.nan

# map to period on sin
def sin_transformer(period):
	return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))

# map to a period on cosine
def cos_transformer(period):
	return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))
    

def transform_columns(df):
    # convert time to cosine sin time
    df['month_sin'] = sin_transformer(12).fit_transform(df['month_of_year'])
    df['month_cos'] = cos_transformer(12).fit_transform(df['month_of_year'])
    
    df['day_sin'] = sin_transformer(31).fit_transform(df['day_of_month'])
    df['day_cos'] = cos_transformer(31).fit_transform(df['day_of_month'])
    
    df['hour_sin'] = sin_transformer(24).fit_transform(df['hour_of_day'])
    df['hour_cos'] = cos_transformer(24).fit_transform(df['hour_of_day'])
    
    df['weekday_sin'] = sin_transformer(7).fit_transform(df['day_of_week'])
    df['weekday_cos'] = cos_transformer(7).fit_transform(df['day_of_week'])
    
    df['yearday_sin'] = sin_transformer(365).fit_transform(df['day_of_year'])
    df['yearday_cos'] = cos_transformer(365).fit_transform(df['day_of_year'])
    
    # drop null rows if any
    df = df.dropna()
    
    return df

# convert numeric columns to float to prep for cyclic transform
def ensure_numeric(df):
    numeric_cols = ['month_of_year', 'day_of_month', 'hour_of_day', 'day_of_week', 'day_of_year']
    for col in numeric_cols:
        df[col] = df[col].apply(lambda x: is_float(x))
    return df

## Clean Data

In [None]:
# read generated dataset
df = pd.read_csv('../data/nyc_grouped_incidents.csv')

# clean and transform
df = ensure_numeric(df)
df = transform_columns(df)

# save to csv
df.to_csv('../data/encoded_df.csv', index=False)