## Copyright 2020 Alexander Monneret

# Sliding Window, Step=1, Flattening Rows

In [106]:
import numpy as np
import pandas as pd
import itertools
from itertools import *

# loading .csv file

In [107]:
data = pd.read_csv("data.csv")

In [108]:
data

Unnamed: 0,Name,f1,f2,class
0,A,1,2,0
1,A,3,4,1
2,A,5,6,0
3,B,20,21,1
4,B,22,23,0
5,B,24,25,1
6,B,26,27,1


In [109]:
max_window = data["Name"].value_counts().min()
f'Max Window Size = {max_window}'

'Max Window Size = 3'

# defining variables

In [96]:
window_size = 2
features_number = 2   # t1, t2, t3
name_column = "Name"

# defining functions

In [110]:
def flatten_rows(df=None, groupby_name="", features_columns=[], num_features=0, window_size=0):
    df_x = df.iloc[:,:num_features+1]
    last_columns = df.iloc[:, np.r_[0, num_features+2:len(df.columns)]]
    cc = df_x.groupby([groupby_name]).cumcount() + 1
    dfk = df_x.set_index([0, cc]).unstack().sort_index(1, level=1)
    
    # join features names with window number
    columns_with_window_num = []
    for i in range(window_size):
        columns_with_window_num = columns_with_window_num + [s + "_"+str(i) for s in features_columns]
    dfk.columns = columns_with_window_num
    
    dfk.reset_index(inplace=True)
    last_columns = last_columns.drop_duplicates()
    dfk = dfk.merge(last_columns, on=0)
    return dfk.rename(columns={0:"name_window"})

In [111]:
def sliding_window(iterable, n=3):
    iterables = itertools.tee(iterable, n)
    
    for iterable, num_skipped in zip(iterables, itertools.count()):
        for _ in range(num_skipped):
            next(iterable, None)
    
    return zip(*iterables)

In [112]:
def rows_to_windows(df=None, groupby_name="", num_features=0, window_size=0):
    
    if df.columns[-1]!="class":
        print("last column must be class")
        return
    
    features_columns = df.columns[1:num_features+1]
    conv = pd.DataFrame()
    for Name, animal in df.groupby(groupby_name):
        window_counter = 0
        for window in sliding_window(animal.values.tolist(),window_size):
        
            x = pd.DataFrame(list(window))
            
            # adding label of last item of the window
            x["class"]=x.iloc[:,-1].iloc[-1]
            
            # adding original name wihtout window number
            x["old_name"]=x[0]
            x[0]= x[0]+ str(window_counter)

            window_counter+=1
            conv = pd.concat([conv,x],axis=0)
    conv.reset_index(drop = True, inplace=True)
    return conv, list(features_columns)

# building sliding windows

In [100]:
data_windows , features_columns = rows_to_windows(data, name_column, features_number, window_size)

In [101]:
data_windows

Unnamed: 0,0,1,2,3,class,old_name
0,A0,1,2,0,1,A
1,A0,3,4,1,1,A
2,A1,3,4,1,0,A
3,A1,5,6,0,0,A
4,B0,20,21,1,0,B
5,B0,22,23,0,0,B
6,B1,22,23,0,1,B
7,B1,24,25,1,1,B
8,B2,24,25,1,1,B
9,B2,26,27,1,1,B


In [113]:
features_columns

['f1', 'f2']

# flattening windows rows

In [114]:
data_flatten = flatten_rows(data_windows, 0, features_columns, features_number, window_size)

In [115]:
data_flatten

Unnamed: 0,name_window,f1_0,f2_0,f1_1,f2_1,class,old_name
0,A0,1,2,3,4,1,A
1,A1,3,4,5,6,0,A
2,B0,20,21,22,23,0,B
3,B1,22,23,24,25,1,B
4,B2,24,25,26,27,1,B
