In [36]:
from lib2to3.pgen2.pgen import DFAState
import pandas as pd
import numpy as np
import datetime as dt

xls = pd.ExcelFile(r'../Registro orangutanes Barcelona.xlsx')
df_raw = pd.read_excel(xls,'Grupo')

DATE = 'date'
SUBJECT = 'subject'
PERIOD = 'period'
REG='reg'
BEHAVIOR='behavior'
FRECUENCY='frecuency'
RECEPTOR='receptor'
DURATION='duration'

class DataManager():
    def __init__(self, df):
        self.df = df

    def process_df(self):
        #return self.df
        self.df.to_csv('clean_df.csv')
    
    def to_seconds(self,t):
        try:
            return pd.to_timedelta(str(t)).total_seconds()
        except:
            return np.nan
        

class CleanDF(DataManager):
    def __init__(self, df):
        super().__init__(df)

    def process_df(self):
        self.columns_to_english()
        self.to_minus()
        self.clean_duration()
        self.freq_to_duration()
        self.receptor_column()
        self.drop_columns()
        return super().process_df()
    
    def columns_to_english(self):
        columns = {'Fecha': DATE,
                   'Sujeto': SUBJECT,
                   'Periodo':PERIOD,
                   'Registro diario': REG,
                   'Conducta': BEHAVIOR,
                   'Frecuencia':FRECUENCY,
                   'Receptor': RECEPTOR,
                   'Duración': DURATION}
        self.df = self.df.rename(columns=columns)
        
    def to_minus (self):
        self.df[BEHAVIOR]=self.df[BEHAVIOR].str.lower()
        
    def receptor_column (self):
        self.df.loc[self.df[RECEPTOR] == 'Gibon*', RECEPTOR] = 'Gibon'
        self.df.loc[self.df[RECEPTOR] == 'Storma*', RECEPTOR] = 'Storma'
        self.df.loc[self.df[RECEPTOR] == 'Locky*', RECEPTOR] = 'Locky'
        self.df.loc[self.df[RECEPTOR] == 'Hadia*', RECEPTOR] = 'Hadia'
        self.df.loc[self.df[RECEPTOR] == 'Popo*', RECEPTOR] = 'Popo'
        self.df.loc[self.df[RECEPTOR] == 'Jawie*', RECEPTOR] = 'Jawie'
        self.df.loc[self.df[RECEPTOR] == 'jawie', RECEPTOR] = 'Jawie'
        
        self.df[RECEPTOR] = self.df[RECEPTOR].fillna('')
        self.df[RECEPTOR] = self.df.receptor.apply(lambda x : x.split())
        
    def clean_duration(self):
        self.df.drop(self.df[self.df[DURATION] == 'd'].index, inplace=True)
        self.df[DURATION] = self.df[DURATION].fillna(dt.time(0, 0))
        self.df[ DURATION] = np.where(self.df[DURATION].astype(str).str.fullmatch(r'\s*'),
                               dt.time(0,0),
                               self.df[DURATION])
        self.df[DURATION] = self.df[DURATION].apply(self.to_seconds)

    
    def freq_to_duration(self):
        self.df[FRECUENCY].fillna(0, inplace=True)
        self.df[FRECUENCY] = self.df[FRECUENCY].apply(self.process_freq)
        self.df[DURATION] += self.df[FRECUENCY]
    
    def process_freq(self,x):
        if isinstance(x, str):
            x = 0
        x *= 5
        return x

    
    def drop_columns(self):
        self.df = self.df.drop(labels=["Observaciones","Ubicacion","frecuency"], axis=1)
        

croqueta= CleanDF(df_raw)
croqueta.process_df()
