In [3]:
import ipaddress

import numpy as np
import pandas as pd

import pandas_ip as ip

## What currently works:

- Creating `IPArrays`
- Storing `IPArrays` in pandas containers

These rely in some changes to pandas.

## Creating arrays of IPAddresses

From strings

In [4]:
ip.to_ipaddress(['0.0.0.0', '192.168.1.1', '2001:0db8:85a3:0000:0000:8a2e:0370:7334'])

IPAddress(['0.0.0.0', '192.168.1.1', '2001:db8:85a3::8a2e:370:7334'])

From integers

In [5]:
ip.to_ipaddress([0, 3232235777, 42540766452641154071740215577757643572])

IPAddress(['0.0.0.0', '192.168.1.1', '2001:db8:85a3::8a2e:370:7334'])

From bytes

In [6]:
ip.to_ipaddress([
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\xa8\x01\x01',
    b' \x01\r\xb8\x85\xa3\x00\x00\x00\x00\x8a.\x03ps4',
])

IPAddress(['0.0.0.0', '192.168.1.1', '2001:db8:85a3::8a2e:370:7334'])

Those all return instances of `IPAddress`, which is analogous to `Categorical`. An array-like container.

In [7]:
values = ip.IPAddress.from_pyints(
    [0, 3232235777, 42540766452641154071740215577757643572]
)
values

IPAddress(['0.0.0.0', '192.168.1.1', '2001:db8:85a3::8a2e:370:7334'])

## Pandas Containers

Our `IPAddress` array can be stored in pandas' containers.

In [8]:
s = pd.Series(values)
s

0                         0.0.0.0
1                     192.168.1.1
2    2001:db8:85a3::8a2e:370:7334
dtype: ip

In [9]:
df = pd.DataFrame({
    "A": [np.nan, 2, 3],
    "B": values
})
df

Unnamed: 0,A,B
0,,0.0.0.0
1,2.0,192.168.1.1
2,3.0,2001:db8:85a3::8a2e:370:7334


## IP Accessor

We register the ".ip" accessor with pandas.

In [10]:
s.ip.is_ipv4

0     True
1     True
2    False
dtype: bool

In [11]:
s.ip.is_ipv6

0    False
1    False
2     True
dtype: bool

In [12]:
s.isna()

0     True
1    False
2    False
dtype: bool

## Pandas Methods

Some pandas operations work correctly on IPAddress data.

Indexing:

In [13]:
df.loc[[0, 1], 'B']

0        0.0.0.0
1    192.168.1.1
Name: B, dtype: ip

In [14]:
df.loc[2, 'B']

IPv6Address('2001:db8:85a3::8a2e:370:7334')

In [15]:
df.iloc[1, 1]

IPv4Address('192.168.1.1')

Concatenation:

In [19]:
pd.concat([df, df], ignore_index=True)

Unnamed: 0,A,B
0,,0.0.0.0
1,2.0,192.168.1.1
2,3.0,2001:db8:85a3::8a2e:370:7334
3,,0.0.0.0
4,2.0,192.168.1.1
5,3.0,2001:db8:85a3::8a2e:370:7334


Null checking

In [20]:
df.isna()

Unnamed: 0,A,B
0,True,True
1,False,False
2,False,False


Many things don't (yet) work

In [26]:
df.B >= df.B

0    False
1     True
2     True
Name: B, dtype: bool

In [27]:
df.B.sort_values()

AttributeError: 'IPAddress' object has no attribute 'argsort'

In [89]:
arr = ip.IPAddress([10, 10, 1, 1, 5])

In [103]:
uniques = pd.unique(arr)

In [90]:
a, _ = pd.factorize(arr.data['lo'].astype('u8'))
b, _ = pd.factorize(arr.data['hi'].astype('u8'))

In [94]:
labels = np.bitwise_xor.reduce(np.concatenate([a.reshape(-1, 1),
                                      b.reshape(-1, 1)], axis=1), axis=1)

In [118]:
mask = np.zeros(len(labels), dtype=bool)
mask[0] = True
inner_mask = (labels[1:] - labels[:-1]) != 0
mask[1:] = inner_mask
mask

array([ True, False,  True, False,  True])

In [121]:
arr[mask]

IPAddress(['0.0.0.10', '0.0.0.1', '0.0.0.5'])

In [111]:
mak

array([False,  True, False,  True])

In [105]:
labels[labels]

array([0, 0, 0, 0, 1])

In [40]:
pd.factorize(arr.data['lo'])

ValueError: Big-endian buffer not supported on little-endian compiler

In [28]:
df.fillna(method='bfill')  # (0, 'B') should have been filled

Unnamed: 0,A,B
0,2.0,0.0.0.0
1,2.0,192.168.1.1
2,3.0,2001:db8:85a3::8a2e:370:7334


In [29]:
df.groupby("B").A.count()

TypeError: 2001:db8:85a3::8a2e:370:7334 and 192.168.1.1 are not of the same version

## IPAddressIndex

Nothing on this is actually implemented.

In [None]:
pd.Index._engine

In [None]:
df.B.values.value_counts()

In [None]:
idx = pd.Series([10, 5, 0], index=ip.IPAddressIndex(df.B), name='counts')
idx

In [None]:
idx.loc[ipaddress.IPv4Address(0)]