### Pandas data structures

In [26]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [27]:
# Series datastructure
S = Series([1, 2, 3, 4, 5])
print(S)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [28]:
print(S[1])
print(S.values)
print(S.index)
print(list(S.index))

2
[1 2 3 4 5]
RangeIndex(start=0, stop=5, step=1)
[0, 1, 2, 3, 4]


In [39]:
S1 = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
S1

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [40]:
print(S1[0])
print(S1["a"])  # Both S1[0] and S1['a'] will return the same output
S1.index

1
1


Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [41]:
# Adding a new element to the series
S1["g"] = 7
S1

a    1
b    2
c    3
d    4
e    5
g    7
dtype: int64

In [42]:
s = Series([8, 9, 10], index=["h", "i", "j"])
S1 = pd.concat([S1, s])
S1

a     1
b     2
c     3
d     4
e     5
g     7
h     8
i     9
j    10
dtype: int64

**Deletion from series**

In [43]:
S1 = S1.drop("g")  # inplace = False -> a new series is returned
S1

a     1
b     2
c     3
d     4
e     5
h     8
i     9
j    10
dtype: int64

In [44]:
S1.drop(
    ["i", "j"], inplace=True
)  # inplace=True -> No return. Modification is done in place
S1

a    1
b    2
c    3
d    4
e    5
h    8
dtype: int64

In [45]:
# Series from a dict
d = {"a": 3, "b": 4}
S2 = Series(d)
S2

a    3
b    4
dtype: int64

In [46]:
S2["a"] = 4
S2

a    4
b    4
dtype: int64

In [47]:
# Series support vectorization
S1 + 1

a    2
b    3
c    4
d    5
e    6
h    9
dtype: int64

In [48]:
S**2

0     1
1     4
2     9
3    16
4    25
dtype: int64

In [50]:
# logarithm of series elements
np.log(S1)

a    0.000000
b    0.693147
c    1.098612
d    1.386294
e    1.609438
h    2.079442
dtype: float64

In [51]:
# Adding 2 Series
S1 + S2

a    5.0
b    6.0
c    NaN
d    NaN
e    NaN
h    NaN
dtype: float64

In [53]:
# Datatype conversion in Series
SC = Series([1, 2, 3, 4, 5])
SC.dtype

dtype('int64')

In [55]:
SC = SC.astype("float")
SC

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
dtype: float64

In [58]:
# Corner case
c = Series(["1", 2, 3, np.nan, np.nan])
# c = c.astype('int64') # This will be an error
# If there are missing values in Series, we have to convert it to float
c = c.astype("float64")
c

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
dtype: float64

In [59]:
type(c)

pandas.core.series.Series