# Hacer matrices e histogramas

**Table of contents**<a id='toc0_'></a>    
- [General](#toc1_1_)    
- [Funciones](#toc1_2_)    
- [_Main_](#toc1_3_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

----
## <a id='toc1_1_'></a>[General](#toc0_)
    

In [1]:
import pickle
import os
import sys
import numpy as np
import pandas as pd
import itertools as it
import multiprocessing
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm as tqdm

general_directory=os.getcwd()+'\\Data\\'
cases=['allt','hw','no','mo']

----
## <a id='toc1_2_'></a>[Funciones](#toc0_)

In [6]:
def get_days():
    if not os.path.isdir(general_directory): os.mkdir('Data')
    if not os.path.isdir(general_directory+'Days'): os.mkdir(general_directory+'Days')
    days=[i for i in os.listdir(general_directory+'Days') if i[-3:]=='dat' ]
    days.sort()
    return days

In [3]:
def save_dataP(name,dictionary,location=general_directory):

    with open(location+'Saves/'+ name+'.pkl', 'wb') as tf:
        pickle.dump(dictionary,tf)
    

In [4]:
times=[i*3600 for i in range(0,25)]

times_r=[str(int(times[i]/3600))+'-'+str(int(times[i+1]/3600)) for i in range(len(times)-1)]

def new_times(t):
    for i in range(len(times)):
        if int(t) <= times[i]:
            return times_r[i-1]
        
def new_times_df(data_f):
    for column in ['end_time','start_time']: data_f[column]=data_f[column].apply(new_times)
    return data_f

In [5]:
def load_day(day):
    nday=pd.read_csv(general_directory+'Days\\'+day,sep='\s+'
                     ,skiprows=1,names=['ID','start_time','end_time','exit','goal'],engine='python')
    nday['exit']=nday['exit'].apply(lambda x : x.replace("'",''))
    nday['goal']=nday['goal'].apply(lambda x : x.replace("'",''))
    nday=new_times_df(nday)
    return nday.set_index(nday['ID'])

In [6]:
def stable_day(t_f):
    data_f = t_f
    #for index in t_f.index:
        #if list(estables.keys()).count(t_f['ID'][index])==0: t_f.drop(index,inplace=True)
  
    return data_f.drop( list( set(data_f.index)-set(estables.keys())) )

In [7]:
def ch_travels(t_f,case='allt'):
    if case=='hw': return t_f.drop_duplicates(subset='ID')
        
    elif case=='mo':
        for tr in times_r[12:24]+times_r[:4]:
            t_f=t_f.drop(t_f[t_f['start_time']== tr].index)
            t_f=t_f.drop(t_f[t_f['end_time']== tr].index)
        return t_f
            
    elif case=='no':
        for tr in times_r [:12]+times_r [21:24]:
            t_f=t_f.drop(t_f[t_f['start_time']== tr].index)
            t_f=t_f.drop(t_f[t_f['end_time']== tr].index)
        return t_f
    
    else: return t_f   

In [8]:
def matrix(travels_f,case,day):
    TM={i:0 for i in list(it.product(towers,repeat=2))}
    travels_f=travels_f.drop(['ID','start_time','end_time'],axis=1)
    GT=travels_f.groupby(['exit','goal']).groups
    for key in GT.keys() : 
        TM[key]=TM.get(key, 0)+len(GT[key])
    L=list(TM.values())
    lenT=len(towers)
    matrixT=np.array([L[i*lenT:(i+1)*lenT] for i in range(lenT)])
    np.savetxt(general_directory+'/Saves/'+day[6:10]+'_'+case+'_'+sys.argv[1]+'_mat.csv',matrixT,delimiter=',')

In [9]:
def hist(travels_f,case,day):
    GOUTC=travels_f.groupby(['start_time','exit']).count()['ID']
    GINC=travels_f.groupby(['end_time','goal']).count()['ID']
    In={}
    Out={}
    for i in times_r:
        if i in GINC : In[i+' In']=In.get(i+' In',[GINC[i][t] if t in GINC[i] else 0 for t in towers])
        else : In[i+' In']=In.get(i+' In',[0 for t in towers])
            
        if i in GOUTC : Out[i+' Out']=Out.get(i+' Out',[GOUTC[i][t] if t in GOUTC[i] else 0 for t in towers])
        else : Out[i+' Out']=Out.get(i+' Out',[0 for t in towers])
    save_dataP(day[6:10]+'_'+case+'_'+sys.argv[1]+'_hist',pd.DataFrame(In|Out, index=towers)) 
    return pd.DataFrame(In|Out, index=towers)

In [10]:
def chargin_data(t_f,case,day):
    with tqdm(desc="Analizing "+day,total=len(cases)) as pbar:
        for i in cases:     
            cht_f=ch_travels(t_f,case)
            matrix(cht_f,case,day)
            if case=='allt':hist(cht_f,day)
            pbar.update()

In [11]:
def chargin_data_allv(t_f,day):
    with tqdm(desc="Analizing "+day,total=2*len(cases)) as pbar:
        for i in cases:     
            cht_f=ch_travels(t_f,i)
            matrix(cht_f,i,day)
            pbar.update()
            hist(cht_f,i,day)
            pbar.update()

In [12]:
def analize(day):
    print("Loading ",day)
    t_f=load_day(day)
    if sys.argv[1]=='st': 
        print('Stabilizing', day)
        t_f=stable_day(t_f)
    chargin_data_allv(t_f,day)
        
   
    
    

----
## <a id='toc1_3_'></a>[_Main_](#toc0_)

In [7]:
def main():
    days=get_days()
    N=len(days)

    if N==0: print('No data yet')
    else:
        try:
            with open(general_directory+'Saves/Towers.pkl','r+b') as pk:
                towers=pickle.load(pk)
        except FileNotFoundError:
            print('You need the file Towers.pkl')
    
        try:
            with open(general_directory+'Saves/Estables.pkl','r+b') as pk:
                estables=pickle.load(pk)
        except FileNotFoundError:
            print('You need the file Estables.pkl')

        with ProcessPoolExecutor(max_workers=int(multiprocessing.cpu_count()/2)) as ex:
            list(ex.map(analize,days))
        

In [8]:
if __name__=="__main__":
    
    main()

No data yet
