# Atrax

### Import Libraries

In [1]:
from atrax import Atrax

### Create fake data

In [4]:
data = [
    {'name': 'Alice', 'age': 30, 'salary': 100000},
    {'name': 'Bob', 'age': 25, 'salary': 80000},
    {'name': 'Charlie', 'age': 35, 'salary': 120000},
    {'name': 'David', 'age': 28, 'salary': 85000},
    {'name': 'Eva', 'age': 40, 'salary': 130000},
    {'name': 'Frank', 'age': 38, 'salary': 125000}
]

### Series tests

#### Printing

In [2]:
s = Atrax.Series([1,2,3,4,5,6,7,8,9,10,11], name="numbers")

In [4]:
s.head(7)

0,1
0,1
1,2
2,3
3,4
4,5
5,6
6,7
"Name: numbers, dtype: int","Name: numbers, dtype: int"


In [5]:
s.tail(3)

0,1
8,9
9,10
10,11
"Name: numbers, dtype: int","Name: numbers, dtype: int"


In [3]:
s

0,1,2,3
0,1,,
1,2,,
2,3,,
3,4,,
4,5,,
5,6,,
6,7,,
7,8,,
8,9,,
9,10,,


In [4]:
print(s)

0: 1
1: 2
2: 3
3: 4
4: 5
5: 6
6: 7
7: 8
8: 9
9: 10
...(11 total)
Name: numbers, dtype: int


In [5]:
type(s)

atrax.core.series.Series

In [6]:
Atrax.Series([1.5, 2.5, 3.25])

0,1
0,1.5
1,2.5
2,3.25
dtype:,float


In [7]:
Atrax.Series([1, 2.5, True, 'hello'], name='mixed')

0,1,2,3
0,1,,
1,2.5,,
2,True,,
3,hello,,
Name:,mixed,dtype:,object


#### astype

In [2]:
s = Atrax.Series(["1", "2", "3", "4"])
s.astype('int')

0,1
0,1
1,2
2,3
3,4
dtype:,int


#### apply

In [2]:
s = Atrax.Series([1,2,3,4])
s.apply(lambda x: x * 2)

0,1
0,2
1,4
2,6
3,8
dtype:,int


In [3]:
def square(x):
    return x * x

In [4]:
s1 = Atrax.Series([1,2,3,4])
s.apply(square)

0,1
0,1
1,4
2,9
3,16
dtype:,int


#### arithmetic


In [2]:
s1 = Atrax.Series([1,2,3,4])
s2 = Atrax.Series([5,6,7,8])

##### addition

In [3]:
s1 + 10

0,1
0,11
1,12
2,13
3,14
dtype:,int


In [4]:
s1 + s2

0,1
0,6
1,8
2,10
3,12
dtype:,int


##### subtraction

In [5]:
s1 - 2

0,1
0,-1
1,0
2,1
3,2
dtype:,int


##### multiplication

In [6]:
s1 * 2

0,1
0,2
1,4
2,6
3,8
dtype:,int


##### division

In [7]:
s1/2

0,1
0,0.5
1,1.0
2,1.5
3,2.0
dtype:,float


##### modulus

In [8]:
s1%2

0,1
0,1
1,0
2,1
3,0
dtype:,int


##### floor division

In [9]:
s1//2

0,1
0,0
1,1
2,1
3,2
dtype:,int


##### pow

In [10]:
s1**2

0,1
0,1
1,4
2,9
3,16
dtype:,int


#### Boolean operators

In [3]:
s1 = Atrax.Series([1,2,3,4], name="s1")
mask = s1 > 2
print(mask)

0: False
1: False
2: True
3: True
Name: (s1 > 2), dtype: int


In [4]:
(s1 > 2).astype('int').apply(lambda x: x * 100)

0,1,2,3
0,0,,
1,0,,
2,100,,
3,100,,
Name:,(s1 > 2),dtype:,int


In [2]:
qty = Atrax.Series([5, 15, 20, 8], name="qty")
price = Atrax.Series([4, 6, 3, 5], name="price")

In [3]:
# filter mask
mask = (qty > 10) & (price < 5)
print(mask)

0: False
1: False
2: True
3: False
Name: ((qty > 10)) & (price < 5), dtype: int


In [4]:
not_mask = ~mask
print(not_mask)

0: True
1: True
2: False
3: True
Name: (~((qty > 10)) & (price < 5)), dtype: int


#### Locators

In [2]:
s = Atrax.Series([10, 20, 30], name="sales", index=["a", "b", "b"])

In [3]:
s

0,1
a,10.0
b,20.0
b,30.0
"Name: sales, dtype: int",


In [5]:
print(s.loc["b"])
print(s.iloc[1])

20
20


In [6]:
print(s.loc[["a", "b"]])

a   10
b   30
Name: sales, dtype: int


In [7]:
print(s.iloc[0:2])

a   10
b   20
Name: sales, dtype: int


In [2]:
s1 = Atrax.Series([1,2,3,4])
s1

0,1
0,1
1,2
2,3
3,4
"Name: , dtype: int","Name: , dtype: int"


### DataSet

In [2]:
ds = Atrax.DataSet([
    {'name': 'candy', 'qty': 2, 'price': 3.5, 'dept': 'grocery'},
    {'name': 'food', 'qty': 5, 'price': 4.0, 'dept': 'grocery'},
    {'name': 'drinks', 'qty': 1, 'price': 10.0, 'dept': 'soft drinks'},
    {'name': 'chips', 'qty': 4, 'price': 2.5, 'dept': 'snacks'},
    {'name': "chocolate", 'qty': 6, 'price': 5.75, 'dept': 'snacks'}
])

#### helpers

In [3]:
ds.info()

<class 'atrax.Atrax'>
Data columns (total 2):
  qty             Non-Null Count: 3     Dtype: int
  price           Non-Null Count: 3     Dtype: float
dtypes: 2
Memory usage: 48 bytes (est.)


In [4]:
ds.describe()

stat,qty,price
mean,2.67,5.83
std,2.08,3.62
min,1.0,3.5
Q1,1.0,3.5
median,2.0,4.0
Q3,5.0,10.0
max,5.0,10.0
count,3.0,3.0


In [6]:
ds.head()

qty,price
2,3.5
5,4.0
1,10.0


In [8]:
ds.shape()

(3, 2)

#### apply

In [3]:
ds

name,qty,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks


In [4]:
total = ds.apply(lambda x: x['qty'] * x['price'])
total


[7.0, 20.0, 10.0]

In [5]:
# create a new dataset
new_ds = ds.apply(lambda row: {
    'item': row['name'],
    'total': row['qty'] * row['price']
})
new_ds

item,total
candy,7.0
food,20.0
drinks,10.0


#### groupby

In [10]:
t = ds.groupby('dept').sum()
t

qty,price,dept
7,7.5,grocery
1,10.0,soft drinks


In [4]:
m = ds.groupby('dept').mean()
m

qty,price,dept
3.5,3.75,grocery
1.0,10.0,soft drinks


In [5]:
n = ds.groupby(['dept']).mean()
n

qty,price,dept
3.5,3.75,grocery
1.0,10.0,soft drinks


In [6]:
n = ds.groupby(by=['dept']).mean()
n

qty,price,dept
3.5,3.75,grocery
1.0,10.0,soft drinks


In [7]:
ds

name,qty,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks


In [8]:
g = ds.groupby(by=['dept']).agg({
    'qty': 'count',
    'price': 'sum'
})
g

qty_count,price_sum,dept
2,7.5,grocery
1,10.0,soft drinks


In [9]:
g = ds.groupby(by=['dept']).agg({
    'qty': ['sum', 'mean', 'count'],
    'price': ['sum', 'max', 'min']
})
g

qty_sum,qty_mean,qty_count,price_sum,price_max,price_min,dept
7,3.5,2,7.5,4.0,3.5,grocery
1,1.0,1,10.0,10.0,10.0,soft drinks


#### sort

In [10]:
ds.sort(by='qty', ascending=True)

name,qty,price,dept
drinks,1,10.0,soft drinks
candy,2,3.5,grocery
chips,4,2.5,snacks
food,5,4.0,grocery
chocolate,6,5.75,snacks


#### basics

In [4]:
ds['total'] = ds['qty'] * ds['price']

In [5]:
ds['qty'] > 2

0,1
0,False
1,True
2,False
"Name: (qty > 2), dtype: int","Name: (qty > 2), dtype: int"


In [6]:
ds[ds['qty'] > 2]

qty,price,total
5,4.0,20.0


In [None]:
# string based lookup
ds.loc[ds['qty'] > 2, ['name', 'qty', 'price']]

name,qty,price
food,5,4.0
chips,4,2.5
chocolate,6,5.75


In [5]:
ds.iloc[0:2, [0,2]] # first 2 rows, 1rs and third columns

name,price
candy,3.5
food,4.0


#### filter

In [None]:
# keep only these columns
r = ds.filter(items=['qty', 'price'])
r

qty,price
2,3.5
5,4.0
1,10.0


In [None]:
# keep columns with expression in the column name
r = ds.filter(like='pri')
r

price
3.5
4.0
10.0


#### isin

In [4]:
ds.loc[ds['dept'].isin(['grocery', 'snacks']), ['name', 'qty', 'price', 'dept']]

name,qty,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
chips,4,2.5,snacks
chocolate,6,5.75,snacks


#### between

In [5]:
ds.loc[ds['price'].between(2,5), ['name', 'qty', 'price', 'dept']]

name,qty,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
chips,4,2.5,snacks


#### unique

In [6]:
ds['dept'].unique()

0,1
0,snacks
1,grocery
2,soft drinks
"Name: Unique(dept), dtype: object","Name: Unique(dept), dtype: object"


In [8]:
ds['dept'].nunique()

3

#### rename

In [4]:
ds1 = ds.rename(columns={'qty': 'quantity', 'price': 'prices'})
ds1

name,quantity,prices,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks
chips,4,2.5,snacks
chocolate,6,5.75,snacks


In [5]:
ds1

name,quantity,prices,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks
chips,4,2.5,snacks
chocolate,6,5.75,snacks


In [6]:
ds1.rename(columns={'prices': 'price'}, inplace=True)
ds1

name,quantity,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks
chips,4,2.5,snacks
chocolate,6,5.75,snacks


#### drop

In [3]:
ds2 = ds.drop(columns=['price', 'qty'])
ds2

name,dept
candy,grocery
food,grocery
drinks,soft drinks
chips,snacks
chocolate,snacks


In [4]:
ds2.drop(columns=['dept'], inplace=True)
ds2

name
candy
food
drinks
chips
chocolate


In [5]:
ds2 = ds.copy()
ds2

name,qty,price,dept
candy,2,3.5,grocery
food,5,4.0,grocery
drinks,1,10.0,soft drinks
chips,4,2.5,snacks
chocolate,6,5.75,snacks


In [6]:
ds3 = ds2.drop(index=[0,2])
ds3

name,qty,price,dept
food,5,4.0,grocery
chips,4,2.5,snacks
chocolate,6,5.75,snacks


In [None]:
ds3.drop(columns=['price'], index=[1], inplace=True) # remove the price column and drop the second row
ds3

name,qty,dept
food,5,grocery
chocolate,6,snacks
