In [1]:
import pandas as pd

In [2]:
from MatrixExamples import m

In [3]:
def duplicates_in_list(lst):
    '''Returns True in there are duplicates in lst.'''
    if len(lst) != len(set(lst)):
        return True
    else:
        return False

In [4]:
def zoning(zones: list, names=['O', 'D']) -> pd.MultiIndex:
    '''returns a MultiIndex object with zones for origins and destinations.'''
    
    if all(isinstance(elem, list) for elem in zones):
        ODs = zones
        if not any(duplicates_in_list for lst in zones):
            raise ValueError('There are duplicated zones')
    
    elif isinstance(zones,list):
        ODs = [zones for name in names]
        if duplicates_in_list(zones):
            raise ValueError('There are duplicated zones')
    
    else:
        raise ValueError('"zones" must be a list or a list of lists')
    
    idx = pd.MultiIndex.from_product(ODs, names=names)
    
    if idx.names != names:
        raise ValueError('zoning could not be created from {}.'.format(zones) + 
                         '\n"zones" must be a list or a list of lists')
    
    return idx

In [7]:
['old', 'new'].extend(None)

TypeError: 'NoneType' object is not iterable

In [266]:
class Matrix(pd.DataFrame):
    '''A Matrix in Transport Planning is a pandas DataFrame,
    with Origins and Destinations as MultiIndex levels: [O, D]'''

    @property
    def _constructor(self):
        '''Matrix operations returns Matrix objects.'''
        return Matrix
    
    @property
    def Os(self):
        '''Returns origin names without duplicates.'''
        return list(self.index.get_level_values(0).unique())
    
    @property
    def Ds(self):
        '''Returns destination names without duplicates.'''
        return list(self.index.get_level_values(1).unique())
    
    @property
    def TO(self):
        '''Returns trip-ends for origins.'''
        return self.groupby(level=0).sum()
    
    @property
    def TD(self):
        '''Returns trip-ends for destinations.'''
        return self.groupby(level=1).sum()
    
    @property
    def TOTALS(self):
        '''Returns the matrix totals.'''
        return self.sum()
    
    def TransposeOD(self, sort=True):
        '''Transposes a matrix in ODT format: swaps Origins and Destinations.'''
        mat_T = self.swaplevel()
        mat_T.index.names = self.index.names
        if sort:
            mat_T = mat_T.sort_index()
        return mat_T
    
    @property
    def TOp(self):
        '''Returns origin proportions: Pij = Tij / TOi'''
        return self.groupby(level=0).apply(lambda x: (x/x.sum()).fillna(0))
    
    @property
    def TDp(self):
        '''Returns destination proportions: Pij = Tij / TDj'''
        return self.groupby(level=1).apply(lambda x: (x/x.sum()).fillna(0))
    
    @property
    def matrix(self):
        '''Returns matrix as a tradicional 2D matrix.'''
        return self.to_panel()
    
    def rezone(self, mapping: pd.DataFrame, mapping_cols=['old', 'new'],
               weight_cols=None, calculate_proportions=True,
               min_weight=0.00000001, check_output_tolerance=0.01):
        '''Changes the zoning system based on mapping.
        A mapping is a correspondence between old zones and new zones.
        
           weights - ['Owght', 'Dwght'] to use for zone disaggregation
           calculate_proportions - if True, weight proportions will be 
               calculated and applied
           min_weight - value for weights with value zero
           check_output_tolerance - will raise an error if the rezoned
               matrix does not have totals similar to self'''
        
        if weight_cols:
            
            if len(weight_cols)!=2:
                raise ValueError("weight_cols must be as in ['Owght', 'Dwght']")
            
            #cap to min_weight
            mapping = mapping[mapping_cols + weight_cols].copy()
            for col in set(weight_cols):
                mapping.loc[mapping[col] < min_weight, col] = min_weight
            
            Owght, Dwght = weight_cols
            Omap = mapping[mapping_cols + [Owght]].copy()
            Dmap = mapping[mapping_cols + [Dwght]].copy()
            
            if calculate_proportions:
                Omap[Owght] = Omap.groupby(mapping_cols[0])[Owght].apply(lambda x: (x/x.sum()))
                Dmap[Dwght] = Dmap.groupby(mapping_cols[1])[Dwght].apply(lambda x: (x/x.sum()))
        else:
            Omap = mapping.reset_index()[mapping_cols]
            Dmap = Omap
        
        suffixes = ['_' + n for n in self.index.names]
        mat = pd.merge(self.reset_index(), Omap.reset_index(),
                      left_on=self.index.names[-2], right_on=mapping_cols[0])
        mat = pd.merge(mat, Dmap.reset_index(),
                      left_on=self.index.names[-1], right_on=mapping_cols[0],
                      suffixes=suffixes)
        
        if weight_cols:
            if Owght == Dwght:
                Owght, Dwght = ['{}{}'.format(Owght,s) for s in suffixes]
                
            for col in self:
                mat[col] = mat[col] * mat[Owght] * mat[Dwght]
                #TODO: proportions are not applied correctly
        
        NewODnames = ['{}{}'.format(mapping_cols[1],s) for s in suffixes]
        
        aux_cols = list(set(mat.columns) - set(self.columns) - set(NewODnames))
        mat = mat.drop(aux_cols, axis=1)
        
        mat = mat.groupby(NewODnames).sum()
        mat = Matrix(mat)
        
        if check_output_tolerance:
            if not all(abs(1-y/x) < check_output_tolerance
                   for x,y in zip(self.TOTALS, mat.TOTALS)):
                raise ValueError("rezone failed. New matrix does not have the same totals.")
        
        return mat
        
    def complete(self, zones):
        '''Rompletes the matrix index with specified zones. Ignores existing zones.'''
        if isinstance(zones, pd.MultiIndex):
            #zones is a zoning system already (MultiIndex)
            zoning = zones
        elif isinstance(zones, list):
            #zones is just a list that needs to be expanded
            zoning = zoning(zones)
        else:
            raise ValueError('"zones" must be list of zones or zoning system (MultiIndex)')
        zoning_union = self.index.union(zoning)
        return self.reindex(index=zoning_union)
    
    def submatrix(self, zoning: pd.MultiIndex):
        '''Returns a submatrix with the origins and destinations specified in zoning'''
        zoning_intersect = self.index.intersection(zoning)
        return self.reindex(zoning_intersect)

In [267]:
mat = Matrix(m)
mat

Unnamed: 0_level_0,Unnamed: 1_level_0,T1,T2,T3
O,D,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0,1,0
1,2,1,2,6
1,3,1,3,10
1,4,0,4,12
1,5,1,5,12
1,6,1,6,10
1,7,0,7,6
2,1,1,8,0
2,2,1,9,0
2,3,0,10,10


In [268]:
zoning1 = zoning(list(range(3)))
zoning2 = zoning(list(range(10)))
zoning3 = zoning([5+i for i in range(5)])
zoning4 = zoning('A B C'.split())

In [269]:
mat.complete(zoning2)

Unnamed: 0_level_0,Unnamed: 1_level_0,T1,T2,T3
O,D,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,,,
0,1,,,
0,2,,,
0,3,,,
0,4,,,
0,5,,,
0,6,,,
0,7,,,
0,8,,,
0,9,,,


In [270]:
mat.matrix.T3

D,1,2,3,4,5,6,7
O,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,6,10,12,12,10,6
2,0,0,10,12,12,10,6
3,0,6,0,12,12,10,6
4,0,6,10,0,12,10,6
5,0,6,10,12,0,10,6
6,0,6,10,12,12,0,6
7,0,6,10,12,12,10,0


In [271]:
mat.TDp.matrix.T3

D,1,2,3,4,5,6,7
O,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.0,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667
2,0.0,0.0,0.166667,0.166667,0.166667,0.166667,0.166667
3,0.0,0.166667,0.0,0.166667,0.166667,0.166667,0.166667
4,0.0,0.166667,0.166667,0.0,0.166667,0.166667,0.166667
5,0.0,0.166667,0.166667,0.166667,0.0,0.166667,0.166667
6,0.0,0.166667,0.166667,0.166667,0.166667,0.0,0.166667
7,0.0,0.166667,0.166667,0.166667,0.166667,0.166667,0.0


In [272]:
basicmapping = pd.DataFrame({
        'sectors': 'A A B B B C C'.split(),
        'zones':   [1,2,3,4,5,6,7,]
    })

mapping = pd.DataFrame({
        'sectors': 'A B A B B B C C C'.split(),
        'zones':   [1,2,2,3,4,5,6,7,5],
        'Val1':    [1,4,4,2,1,3,1,4,2],
        'Val2':    [3,0,1,2,1,3,3,1,0]
    })

In [273]:
mat.rezone(basicmapping, ['zones', 'sectors'])

Unnamed: 0_level_0,Unnamed: 1_level_0,T1,T2,T3
sectors_O,sectors_D,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,A,3,20,6
A,B,4,45,68
A,C,2,40,32
B,A,4,135,18
B,B,6,225,68
B,C,4,165,48
C,A,2,160,12
C,B,4,255,68
C,C,3,180,16


In [274]:
mat.rezone(basicmapping, ['zones', 'sectors'])

Unnamed: 0_level_0,Unnamed: 1_level_0,T1,T2,T3
sectors_O,sectors_D,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,A,3,20,6
A,B,4,45,68
A,C,2,40,32
B,A,4,135,18
B,B,6,225,68
B,C,4,165,48
C,A,2,160,12
C,B,4,255,68
C,C,3,180,16


In [275]:
rezoned = mat.rezone(mapping, ['zones', 'sectors'], weight_cols=['Val1', 'Val1'], check_output_tolerance=0)
rezoned

ValueError: cannot copy sequence with size 2 to array axis with dimension 9

In [254]:
rezoned.TOTALS

T1     13.833333
T2    522.666667
T3    131.000000
dtype: float64

In [168]:
mat.TOTALS

T1      32
T2    1225
T3     336
dtype: int64