# 10 Minutes to Pandas

## Author: Sheikh Irfan Ullah Khan

### Contact Me: shirfan.math@gmail.com

In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [5]:
# Dictionary
dict = {"Irfan": 5.10, "Ahmad": 4.5}

In [6]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [8]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 3
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype        
---  ------  --------------  -----        
 0   A       4 non-null      float64      
 1   B       4 non-null      datetime64[s]
 2   C       4 non-null      float32      
 3   D       4 non-null      int32        
 4   E       4 non-null      category     
 5   F       4 non-null      object       
dtypes: category(1), datetime64[s](1), float32(1), float64(1), int32(1), object(1)
memory usage: 288.0+ bytes


In [9]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023


In [10]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [11]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [14]:
df.to_numpy()

array([[-2.04261803,  0.14968627,  2.94012582,  1.05570279],
       [ 0.19767837, -0.51604574, -1.1740925 , -0.98173897],
       [ 2.34626817,  1.5667294 ,  0.85317017,  1.370745  ],
       [-0.44018668, -0.650983  ,  1.4100109 , -2.37983789],
       [-0.57561951, -1.41172565,  0.07642068,  0.0690229 ],
       [-0.49973533, -1.33600627, -1.53998066,  0.82506764]])

In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.169036,-0.366391,0.427609,-0.00684
std,1.436645,1.108112,1.674464,1.435696
min,-2.042618,-1.411726,-1.539981,-2.379838
25%,-0.556648,-1.16475,-0.861464,-0.719048
50%,-0.469961,-0.583514,0.464795,0.447045
75%,0.038212,-0.016747,1.270801,0.998044
max,2.346268,1.566729,2.940126,1.370745


In [16]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [17]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-2.042618,0.197678,2.346268,-0.440187,-0.57562,-0.499735
B,0.149686,-0.516046,1.566729,-0.650983,-1.411726,-1.336006
C,2.940126,-1.174092,0.85317,1.410011,0.076421,-1.539981
D,1.055703,-0.981739,1.370745,-2.379838,0.069023,0.825068


In [18]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [19]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,1.055703,2.940126,0.149686,-2.042618
2013-01-02,-0.981739,-1.174092,-0.516046,0.197678
2013-01-03,1.370745,0.85317,1.566729,2.346268
2013-01-04,-2.379838,1.410011,-0.650983,-0.440187
2013-01-05,0.069023,0.076421,-1.411726,-0.57562
2013-01-06,0.825068,-1.539981,-1.336006,-0.499735


In [20]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-01,-2.042618,0.149686,2.940126,1.055703


In [21]:
df.sort_values(by="A")

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745


In [22]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [23]:
df.sort_values(by = "A")

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745


In [24]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [25]:
df[["A", "B"]]

Unnamed: 0,A,B
2013-01-01,-2.042618,0.149686
2013-01-02,0.197678,-0.516046
2013-01-03,2.346268,1.566729
2013-01-04,-0.440187,-0.650983
2013-01-05,-0.57562,-1.411726
2013-01-06,-0.499735,-1.336006


In [26]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [27]:
df.shape

(6, 4)

In [28]:
df.iloc[: , 0:4] # To lock the index

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [29]:
df.loc[:, ["A", "B", "C", "D"]]

Unnamed: 0,A,B,C,D
2013-01-01,-2.042618,0.149686,2.940126,1.055703
2013-01-02,0.197678,-0.516046,-1.174092,-0.981739
2013-01-03,2.346268,1.566729,0.85317,1.370745
2013-01-04,-0.440187,-0.650983,1.410011,-2.379838
2013-01-05,-0.57562,-1.411726,0.076421,0.069023
2013-01-06,-0.499735,-1.336006,-1.539981,0.825068


In [30]:
df.iloc[3] # To display the output of n+1 row

A   -0.440187
B   -0.650983
C    1.410011
D   -2.379838
Name: 2013-01-04 00:00:00, dtype: float64

In [31]:
df.iloc[3:5, 0:2] # For slicing, iloc, loc and index is used

Unnamed: 0,A,B
2013-01-04,-0.440187,-0.650983
2013-01-05,-0.57562,-1.411726


In [32]:
import seaborn as sns
df1 = sns.load_dataset('titanic')
df1

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [33]:
df1.sample(100)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
494,0,3,male,21.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
773,0,3,male,,0,0,7.2250,C,Third,man,True,,Cherbourg,no,True
132,0,3,female,47.0,1,0,14.5000,S,Third,woman,False,,Southampton,no,False
859,0,3,male,,0,0,7.2292,C,Third,man,True,,Cherbourg,no,True
56,1,2,female,21.0,0,0,10.5000,S,Second,woman,False,,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,1,3,female,,1,0,15.5000,Q,Third,woman,False,,Queenstown,yes,False
638,0,3,female,41.0,0,5,39.6875,S,Third,woman,False,,Southampton,no,False
245,0,1,male,44.0,2,0,90.0000,Q,First,man,True,C,Queenstown,no,False
402,0,3,female,21.0,1,0,9.8250,S,Third,woman,False,,Southampton,no,False


In [34]:
df1[df1["fare"] < 5]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
179,0,3,male,36.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
263,0,1,male,40.0,0,0,0.0,S,First,man,True,B,Southampton,no,True
271,1,3,male,25.0,0,0,0.0,S,Third,man,True,,Southampton,yes,True
277,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
302,0,3,male,19.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
378,0,3,male,20.0,0,0,4.0125,C,Third,man,True,,Cherbourg,no,True
413,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
466,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
481,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
597,0,3,male,49.0,0,0,0.0,S,Third,man,True,,Southampton,no,True


In [35]:
# Assignment: Take the dataset of iris and apply the 10 minutes of pandas to that dataset