## Pandas

### Create pandas series

In [2]:
import pandas as pd
import numpy as np

a = pd.Series([1,2,3,4,5])
print(a)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [4]:
# information about series

print(a.dtype)
print(a.axes)
print(a.size)
print(a.ndim)

int64
[RangeIndex(start=0, stop=5, step=1)]
5
1


In [7]:
print(a.values)

[1 2 3 4 5]


In [8]:
# access to index with 'head' func

print(a.head(3))

0    1
1    2
2    3
dtype: int64


In [9]:
print(a.tail(3))

2    3
3    4
4    5
dtype: int64


### index naming

In [10]:
a = pd.Series([40, 27, 30, 37, 25], index = [1,3,5,7,9])
print(a)

1    40
3    27
5    30
7    37
9    25
dtype: int64


In [11]:
a = pd.Series([46, 40, 34, 30], index = ["a", "b", "c", "d"])
print(a)

a    46
b    40
c    34
d    30
dtype: int64


In [12]:
print(a["a": "c"])

a    46
b    40
c    34
dtype: int64


### join two series

In [14]:
a_1 = pd.Series([28, 24, 20, 13], index=["e", "f", "g", "h"])
b = pd.concat([a,a_1])
print(b)

a    46
b    40
c    34
d    30
e    28
f    24
g    20
h    13
dtype: int64


### element operations

In [15]:
a = np.array([1,2,3,4,5])
series = pd.Series(a)
print(series[1:3])

1    2
2    3
dtype: int32


In [16]:
# element questioning

a = pd.Series([1,2,3,4], index=["a","b", "c", "d"])
print("b" in a)

True


In [17]:
print("e" in a)

False


### Create dataframe with pandas

In [18]:
list_1 = [1,2,3,4,5]
data = pd.DataFrame(list_1, columns=["variable"])
print(data)

   variable
0         1
1         2
2         3
3         4
4         5


In [19]:
m = np.arange(1,10).reshape((3,3))
data = pd.DataFrame(m, columns=["var1", "var2", "var3"])
print(data)

   var1  var2  var3
0     1     2     3
1     4     5     6
2     7     8     9


In [20]:
print(data["var1"])

0    1
1    4
2    7
Name: var1, dtype: int32


### Variable naming in dataframe

In [22]:
ls = np.arange(1,10).reshape((3,3))
df = pd.DataFrame(ls, columns=["var1", "var2", "var3"])
print(df)

   var1  var2  var3
0     1     2     3
1     4     5     6
2     7     8     9


In [23]:
df.columns = ["valu1", "valu2", "value3"]
print(df)

   valu1  valu2  value3
0      1      2       3
1      4      5       6
2      7      8       9


### Dataframe transactions

In [25]:
df.drop(0, axis=0, inplace=True)
print(df)

   valu1  valu2  value3
1      4      5       6
2      7      8       9


In [27]:
df["var4"] = df["valu1"] * df["valu2"]
print(df)

   valu1  valu2  value3  var4
1      4      5       6    20
2      7      8       9    56


In [34]:
ls_1 = ["valu1", "valu2"]
df.drop(ls_1, axis=1)
print(df)

   valu1  valu2  value3  var4
1      4      5       6    20
2      7      8       9    56


### choose observation and variable: loc and iloc

In [36]:
m = np.random.randint(1,30, size=(10,3))
df = pd.DataFrame(m, columns=["var1", "var2", "var3"])

In [37]:
df.loc[0:5]

Unnamed: 0,var1,var2,var3
0,29,8,16
1,23,12,27
2,24,21,18
3,6,20,29
4,24,9,25
5,26,27,26


In [38]:
df.iloc[0:5]

Unnamed: 0,var1,var2,var3
0,29,8,16
1,23,12,27
2,24,21,18
3,6,20,29
4,24,9,25


In [41]:
df.loc[(df.var1 > 15), ["var1", "var2"]]

Unnamed: 0,var1,var2
0,29,8
1,23,12
2,24,21
4,24,9
5,26,27
6,19,15
8,19,29
9,17,15


### join transactions

In [43]:
df1 = df + 20
jn = pd.concat([df1, df])
print(jn)

   var1  var2  var3
0    49    28    36
1    43    32    47
2    44    41    38
3    26    40    49
4    44    29    45
5    46    47    46
6    39    35    36
7    27    47    40
8    39    49    37
9    37    35    43
0    29     8    16
1    23    12    27
2    24    21    18
3     6    20    29
4    24     9    25
5    26    27    26
6    19    15    16
7     7    27    20
8    19    29    17
9    17    15    23


In [44]:
pd.concat([df1, df],ignore_index=True)

Unnamed: 0,var1,var2,var3
0,49,28,36
1,43,32,47
2,44,41,38
3,26,40,49
4,44,29,45
5,46,47,46
6,39,35,36
7,27,47,40
8,39,49,37
9,37,35,43


In [46]:
pd.concat([df1, df], join="inner") # does not take non-partners > 'join=inner'

Unnamed: 0,var1,var2,var3
0,49,28,36
1,43,32,47
2,44,41,38
3,26,40,49
4,44,29,45
5,46,47,46
6,39,35,36
7,27,47,40
8,39,49,37
9,37,35,43


### advanced join transaction

In [47]:
cs1 = pd.DataFrame({'worker': ['Ali', 'John', 'Maria'],
                   'group': ["accounting", "engineer", "IT"]})
cs2 = pd.DataFrame({'worker': ['Ali', 'John', 'Maria'],
                   'first_entrance': [2012, 2014, 2019]})
cs_whole = pd.merge(cs1, cs2)
print(cs_whole)

  worker       group  first_entrance
0    Ali  accounting            2012
1   John    engineer            2014
2  Maria          IT            2019
