# Pandas

In [1]:
import pandas as pd
import numpy as np
import openpyxl

In [2]:
pd.Series([1, 2, 'jadi', 3, np.nan])

0       1
1       2
2    jadi
3       3
4     NaN
dtype: object

In [3]:
dates = pd.date_range("20220101", periods=6)
dates

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])
df

Unnamed: 0,a,b,c,d
2022-01-01,-1.307739,0.235503,1.185031,-1.049822
2022-01-02,-1.146558,0.474369,-0.639223,1.960478
2022-01-03,-0.315788,-0.134179,0.835472,0.787683
2022-01-04,-1.343009,0.89755,0.402764,-0.157162
2022-01-05,0.36898,-0.338918,1.530287,0.744675
2022-01-06,0.605581,-0.793444,1.456261,0.64907


In [5]:
df["a"]

2022-01-01   -1.307739
2022-01-02   -1.146558
2022-01-03   -0.315788
2022-01-04   -1.343009
2022-01-05    0.368980
2022-01-06    0.605581
Freq: D, Name: a, dtype: float64

In [6]:
df.head(2)

Unnamed: 0,a,b,c,d
2022-01-01,-1.307739,0.235503,1.185031,-1.049822
2022-01-02,-1.146558,0.474369,-0.639223,1.960478


In [7]:
df.tail(2)

Unnamed: 0,a,b,c,d
2022-01-05,0.36898,-0.338918,1.530287,0.744675
2022-01-06,0.605581,-0.793444,1.456261,0.64907


In [8]:
df.describe()

Unnamed: 0,a,b,c,d
count,6.0,6.0,6.0,6.0
mean,-0.523089,0.056813,0.795099,0.489154
std,0.870553,0.604837,0.817621,1.01301
min,-1.343009,-0.793444,-0.639223,-1.049822
25%,-1.267443,-0.287734,0.510941,0.044396
50%,-0.731173,0.050662,1.010252,0.696872
75%,0.197788,0.414653,1.388454,0.776931
max,0.605581,0.89755,1.530287,1.960478


In [9]:
df.mean()

a   -0.523089
b    0.056813
c    0.795099
d    0.489154
dtype: float64

In [10]:
df["a"].mean()

-0.5230887599037596

In [11]:
df.T

Unnamed: 0,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06
a,-1.307739,-1.146558,-0.315788,-1.343009,0.36898,0.605581
b,0.235503,0.474369,-0.134179,0.89755,-0.338918,-0.793444
c,1.185031,-0.639223,0.835472,0.402764,1.530287,1.456261
d,-1.049822,1.960478,0.787683,-0.157162,0.744675,0.64907


In [12]:
df.sort_values(by="b")

Unnamed: 0,a,b,c,d
2022-01-06,0.605581,-0.793444,1.456261,0.64907
2022-01-05,0.36898,-0.338918,1.530287,0.744675
2022-01-03,-0.315788,-0.134179,0.835472,0.787683
2022-01-01,-1.307739,0.235503,1.185031,-1.049822
2022-01-02,-1.146558,0.474369,-0.639223,1.960478
2022-01-04,-1.343009,0.89755,0.402764,-0.157162


In [13]:
df[df["a"]>0]

Unnamed: 0,a,b,c,d
2022-01-05,0.36898,-0.338918,1.530287,0.744675
2022-01-06,0.605581,-0.793444,1.456261,0.64907


## read & write operations
- df.read_csv("myfile.csv") (for reading a csv file)

In [14]:
df.to_csv("intro-pandas.csv")

#### csv = comma seperated values

In [15]:
df.to_excel('intro-pandas.xlsx')