## Pandas get started

In [1]:
import numpy as np
import pandas as pd

### Series

In [2]:
index=[1,2,3,4]
data=['Monday','Tuesday','Wednesday','Thursday']

s = pd.Series(data, index=index)
s

1       Monday
2      Tuesday
3    Wednesday
4     Thursday
dtype: object

In [3]:
s[1]

'Monday'

In [4]:
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
s

a   -0.907303
b    0.076719
c   -0.011622
d    0.933775
e   -1.319637
dtype: float64

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
s.values

array([-0.90730338,  0.07671907, -0.01162174,  0.93377515, -1.31963745])

In [7]:
d = {"b": 1, "a": 0, "c": 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [8]:
d = {"a": 0.0, "b": 1.0, "c": 2.0}

pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [9]:
pd.Series(d,index=['b','c','d','a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [10]:
pd.Series(5.0,index=['a','b','c','d','e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [11]:
s[0]

-0.9073033779874651

In [12]:
s[:3]

a   -0.907303
b    0.076719
c   -0.011622
dtype: float64

In [13]:
s[s>s.median()]

b    0.076719
d    0.933775
dtype: float64

In [14]:
s[[4,3,1]]

e   -1.319637
d    0.933775
b    0.076719
dtype: float64

In [15]:
np.exp(s)

a    0.403611
b    1.079739
c    0.988446
d    2.544095
e    0.267232
dtype: float64

In [16]:
s.dtype

dtype('float64')

In [17]:
s.array

<PandasArray>
[  -0.9073033779874651,   0.07671906615283365, -0.011621741593320165,
    0.9337751529041202,   -1.3196374546811962]
Length: 5, dtype: float64

In [18]:
s.values

array([-0.90730338,  0.07671907, -0.01162174,  0.93377515, -1.31963745])

In [19]:
s.to_numpy()

array([-0.90730338,  0.07671907, -0.01162174,  0.93377515, -1.31963745])

In [20]:
if s.to_numpy().all()==s.values.all():
    print(True)

True


In [21]:
s['a']

-0.9073033779874651

In [22]:
"f" in s

False

In [23]:
"e" in s

True

In [24]:
s.get('f')
s.get('f',np.nan)

nan

In [25]:
s*2

a   -1.814607
b    0.153438
c   -0.023243
d    1.867550
e   -2.639275
dtype: float64

In [26]:
s[1:]+s[:-1]

a         NaN
b    0.153438
c   -0.023243
d    1.867550
e         NaN
dtype: float64

In [27]:
s = pd.Series(np.random.randn(5), name="something")
s

0   -0.840882
1    0.194929
2   -1.038651
3    0.347427
4   -0.512446
Name: something, dtype: float64

In [28]:
s.name

'something'

### DataFrame

In [29]:
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}

In [30]:
df=pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [31]:
pd.DataFrame(d,index=['d','b','a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [32]:
pd.DataFrame(d,index=['d','b','a'],columns=['one','three'])

Unnamed: 0,one,three
d,,
b,2.0,
a,1.0,


In [33]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [34]:
df.columns

Index(['one', 'two'], dtype='object')

In [35]:
d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [36]:
pd.DataFrame(d,index=['a','b','c','d'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [38]:
data=np.zeros((2,), dtype=[()])

array([0., 0.])

In [40]:
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [42]:
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])