# Pandas tutorial (Day-11)


- How to install libraries:
  - pip install pandas
  - pip install numpy

- Importing labraries:

In [51]:
import pandas as pd
import numpy as np

In [52]:
# Object creation
s = pd.Series([1,2,3,np.nan,4,5,6,7])
s

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
6    6.0
7    7.0
dtype: float64

In [53]:
# Date creation
dates = pd.date_range("20130101", periods=6)
dates 

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [54]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD")) # 6 means rows and 4 means columns
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.921015,2.036817,-0.976086,1.752388
2013-01-02,-1.245982,1.372515,0.297614,-1.299776
2013-01-03,-0.427879,0.795067,0.658226,-0.928907
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519


In [55]:
 df2 = pd.DataFrame(
    {
        "A":1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] *4, dtype="int32"),
        "E": pd.Categorical(["female", "girl","women","girl"]), # non-numerical
        "F": "gender",
    }
 )
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,female,gender
1,1.0,2013-01-02,1.0,3,girl,gender
2,1.0,2013-01-02,1.0,3,women,gender
3,1.0,2013-01-02,1.0,3,girl,gender


In [56]:
# Finding Type  
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [57]:
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,-1.921015,2.036817,-0.976086,1.752388
2013-01-02,-1.245982,1.372515,0.297614,-1.299776


In [58]:
df.tail(2)

Unnamed: 0,A,B,C,D
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519


In [59]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [60]:
df.to_numpy()

array([[-1.92101459,  2.03681671, -0.97608648,  1.75238843],
       [-1.24598177,  1.37251537,  0.29761391, -1.29977613],
       [-0.42787941,  0.79506668,  0.65822595, -0.92890657],
       [ 0.09746294, -0.30285462, -0.07671642, -0.39696579],
       [-1.52716745,  0.19849635,  0.74559322,  1.13576269],
       [-2.45053766,  0.12723379, -1.17958773, -1.73519006]])

In [61]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'female',
        'gender'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'girl', 'gender'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'women', 'gender'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'girl', 'gender']],
      dtype=object)

In [62]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-1.245853,0.704546,-0.088493,-0.245448
std,0.944439,0.87562,0.822291,1.394282
min,-2.450538,-0.302855,-1.179588,-1.73519
25%,-1.822553,0.145049,-0.751244,-1.207059
50%,-1.386575,0.496782,0.110449,-0.662936
75%,-0.632405,1.228153,0.568073,0.752581
max,0.097463,2.036817,0.745593,1.752388


In [63]:
df2.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


In [64]:
# To transpose data 
df2.T # columns converted into rows and rows into columns.

Unnamed: 0,0,1,2,3
A,1.0,1.0,1.0,1.0
B,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00
C,1.0,1.0,1.0,1.0
D,3,3,3,3
E,female,girl,women,girl
F,gender,gender,gender,gender


In [65]:
df.sort_index(axis=0, ascending=False)
# If it is written as false then data will be converted into decending order.

Unnamed: 0,A,B,C,D
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966
2013-01-03,-0.427879,0.795067,0.658226,-0.928907
2013-01-02,-1.245982,1.372515,0.297614,-1.299776
2013-01-01,-1.921015,2.036817,-0.976086,1.752388


In [66]:
#if we want to run the data on the basis of (B)
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-03,-0.427879,0.795067,0.658226,-0.928907
2013-01-02,-1.245982,1.372515,0.297614,-1.299776
2013-01-01,-1.921015,2.036817,-0.976086,1.752388


In [67]:
 # This is how we can get columns one by one
 df["B"]

2013-01-01    2.036817
2013-01-02    1.372515
2013-01-03    0.795067
2013-01-04   -0.302855
2013-01-05    0.198496
2013-01-06    0.127234
Freq: D, Name: B, dtype: float64

In [68]:
# Row wise selection
df[0:7]

Unnamed: 0,A,B,C,D
2013-01-01,-1.921015,2.036817,-0.976086,1.752388
2013-01-02,-1.245982,1.372515,0.297614,-1.299776
2013-01-03,-0.427879,0.795067,0.658226,-0.928907
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519


In [69]:
# We can get date wise data
# Row wise selection
df.loc[dates[2]]

A   -0.427879
B    0.795067
C    0.658226
D   -0.928907
Name: 2013-01-03 00:00:00, dtype: float64

In [70]:
# if we want specific data
# Column wise selection
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-1.921015,2.036817
2013-01-02,-1.245982,1.372515
2013-01-03,-0.427879,0.795067
2013-01-04,0.097463,-0.302855
2013-01-05,-1.527167,0.198496
2013-01-06,-2.450538,0.127234


In [71]:
# If we want year wise data
df.loc["20130101":"20130102", ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-1.921015,2.036817
2013-01-02,-1.245982,1.372515


In [72]:
# We can get data of an exact date
df.loc["20130102", ["A", "B", "C"]]

A   -1.245982
B    1.372515
C    0.297614
Name: 2013-01-02 00:00:00, dtype: float64

In [73]:
df.at[dates[5], "A"]

-2.450537657831196

In [80]:
df2.iloc[0:4, 0:5]

Unnamed: 0,A,B,C,D,E
0,1.0,2013-01-02,1.0,3,female
1,1.0,2013-01-02,1.0,3,girl
2,1.0,2013-01-02,1.0,3,women
3,1.0,2013-01-02,1.0,3,girl


In [81]:
df.iloc[:, 0:2]

Unnamed: 0,A,B
2013-01-01,-1.921015,2.036817
2013-01-02,-1.245982,1.372515
2013-01-03,-0.427879,0.795067
2013-01-04,0.097463,-0.302855
2013-01-05,-1.527167,0.198496
2013-01-06,-2.450538,0.127234


In [82]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966


In [83]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,,2.036817,,1.752388
2013-01-02,,1.372515,0.297614,
2013-01-03,,0.795067,0.658226,
2013-01-04,0.097463,,,
2013-01-05,,0.198496,0.745593,1.135763
2013-01-06,,0.127234,,


In [84]:
df2 = df.copy()

In [86]:
# You should have equal numbers to add in the columns
df2["BabaGKaColumn"] = ["One", "One", "Two", "Three", "Four", "Three"]

In [94]:
df2["mean"]=[1.2, 2.2, 3.3, 4.5, 5.5,6.6]
df2

Unnamed: 0,A,B,C,D,new,mean
2013-01-01,-1.921015,2.036817,-0.976086,1.752388,1.2,1.2
2013-01-02,-1.245982,1.372515,0.297614,-1.299776,2.2,2.2
2013-01-03,-0.427879,0.795067,0.658226,-0.928907,3.3,3.3
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966,4.5,4.5
2013-01-05,-1.527167,0.198496,0.745593,1.135763,5.5,5.5
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519,6.6,6.6


In [95]:
df2 = df2.iloc[:,0:4]
df2

Unnamed: 0,A,B,C,D
2013-01-01,-1.921015,2.036817,-0.976086,1.752388
2013-01-02,-1.245982,1.372515,0.297614,-1.299776
2013-01-03,-0.427879,0.795067,0.658226,-0.928907
2013-01-04,0.097463,-0.302855,-0.076716,-0.396966
2013-01-05,-1.527167,0.198496,0.745593,1.135763
2013-01-06,-2.450538,0.127234,-1.179588,-1.73519
