# pandas sample

[reference](https://pandas.pydata.org/docs/user_guide/10min.html)

In [1]:
!pip install pandas



## import pandas

In [5]:
import pandas as pd
import numpy as np

## creating your first series and data frame

In [6]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [7]:
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [8]:
dates = pd.date_range("20130101", periods=6)

In [9]:
print(dates)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')


In [10]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [11]:
print(df)

                   A         B         C         D
2013-01-01  1.280726  0.147904  2.012513  0.121011
2013-01-02 -1.553230  0.511425 -0.054808 -1.831615
2013-01-03 -1.120092 -0.367089 -0.472553 -1.529445
2013-01-04  1.495501  0.103318 -1.475724  0.841698
2013-01-05 -1.188019  1.256653  1.106258 -1.268120
2013-01-06  0.239293 -0.559810 -1.224088  0.824984


In [12]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,1.280726,0.147904,2.012513,0.121011
2013-01-02,-1.55323,0.511425,-0.054808,-1.831615
2013-01-03,-1.120092,-0.367089,-0.472553,-1.529445
2013-01-04,1.495501,0.103318,-1.475724,0.841698
2013-01-05,-1.188019,1.256653,1.106258,-1.26812


In [14]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,-1.55323,0.511425,-0.054808,-1.831615
2013-01-03,-1.120092,-0.367089,-0.472553,-1.529445
2013-01-04,1.495501,0.103318,-1.475724,0.841698
2013-01-05,-1.188019,1.256653,1.106258,-1.26812
2013-01-06,0.239293,-0.55981,-1.224088,0.824984


In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [16]:
df.to_numpy()

array([[ 1.28072609,  0.14790386,  2.01251326,  0.1210115 ],
       [-1.55323009,  0.51142468, -0.05480809, -1.83161512],
       [-1.12009239, -0.36708892, -0.47255322, -1.52944516],
       [ 1.49550104,  0.1033181 , -1.47572403,  0.84169831],
       [-1.18801945,  1.25665283,  1.1062582 , -1.26811989],
       [ 0.23929273, -0.55981045, -1.22408828,  0.82498417]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.14097,0.182067,-0.018067,-0.473581
std,1.333666,0.651542,1.354586,1.213274
min,-1.55323,-0.55981,-1.475724,-1.831615
25%,-1.171038,-0.249487,-1.036205,-1.464114
50%,-0.4404,0.125611,-0.263681,-0.573554
75%,1.020368,0.420544,0.815992,0.648991
max,1.495501,1.256653,2.012513,0.841698


## Selecting data

In [18]:
df["A"]

2013-01-01    1.280726
2013-01-02   -1.553230
2013-01-03   -1.120092
2013-01-04    1.495501
2013-01-05   -1.188019
2013-01-06    0.239293
Freq: D, Name: A, dtype: float64

In [19]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,1.280726,0.147904,2.012513,0.121011
2013-01-02,-1.55323,0.511425,-0.054808,-1.831615
2013-01-03,-1.120092,-0.367089,-0.472553,-1.529445


In [20]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,1.280726,0.147904
2013-01-02,-1.55323,0.511425
2013-01-03,-1.120092,-0.367089
2013-01-04,1.495501,0.103318
2013-01-05,-1.188019,1.256653
2013-01-06,0.239293,-0.55981


In [21]:
df.at[dates[0], "A"]

1.2807260857155118

In [22]:
df.iloc[3]

A    1.495501
B    0.103318
C   -1.475724
D    0.841698
Name: 2013-01-04 00:00:00, dtype: float64

In [23]:
df.iat[1, 1]

0.5114246750173752

In [24]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.280726,0.147904,2.012513,0.121011
2013-01-04,1.495501,0.103318,-1.475724,0.841698
2013-01-06,0.239293,-0.55981,-1.224088,0.824984


In [25]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.280726,0.147904,2.012513,0.121011
2013-01-02,,0.511425,,
2013-01-03,,,,
2013-01-04,1.495501,0.103318,,0.841698
2013-01-05,,1.256653,1.106258,
2013-01-06,0.239293,,,0.824984


## Basic operations

In [26]:
df.mean()

A   -0.140970
B    0.182067
C   -0.018067
D   -0.473581
dtype: float64

In [27]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [28]:
s.value_counts()

4    3
5    3
3    2
2    2
Name: count, dtype: int64

In [30]:
s

0    4
1    5
2    3
3    2
4    4
5    4
6    5
7    2
8    5
9    3
dtype: int32

### saving and loading CSV

In [31]:
df.to_csv("data.csv")

In [34]:
loaded_df = pd.read_csv("data.csv", index_col=0)

In [35]:
print(loaded_df)

                   A         B         C         D
2013-01-01  1.280726  0.147904  2.012513  0.121011
2013-01-02 -1.553230  0.511425 -0.054808 -1.831615
2013-01-03 -1.120092 -0.367089 -0.472553 -1.529445
2013-01-04  1.495501  0.103318 -1.475724  0.841698
2013-01-05 -1.188019  1.256653  1.106258 -1.268120
2013-01-06  0.239293 -0.559810 -1.224088  0.824984
