# 10 Minutes to Pandas

In [2]:
import pandas as pd
import numpy as np

In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
#np here is numpy whereas nan is not a number. It is a special value in numpy that represents an invalid or unreliable result. 
# It is used to indicate that a value is missing or unreliable in a dataset.

s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range("20250412", periods=15)

#dates are always in the format of year-month-day
dates

DatetimeIndex(['2025-04-12', '2025-04-13', '2025-04-14', '2025-04-15',
               '2025-04-16', '2025-04-17', '2025-04-18', '2025-04-19',
               '2025-04-20', '2025-04-21', '2025-04-22', '2025-04-23',
               '2025-04-24', '2025-04-25', '2025-04-26'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(15, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2025-04-12,-1.424472,0.349569,0.706149,0.068881
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015
2025-04-14,-0.588822,-0.712983,0.752802,-0.692384
2025-04-15,-0.233184,0.319955,1.538491,1.75019
2025-04-16,0.369683,1.157398,0.987999,-1.747668
2025-04-17,-0.830751,-0.716609,1.188963,0.923156
2025-04-18,0.516381,0.170187,-1.06148,1.10284
2025-04-19,-0.021682,1.214568,0.461782,0.120435
2025-04-20,-1.16388,-0.824062,-2.035692,0.030006
2025-04-21,1.293945,-0.947916,-1.579765,1.196646


# Dictionary

In [6]:
#dictionary

dict={"Hamza" : 6.3, "Shadja" : 5.2}

In [7]:

df2 = pd.DataFrame(

    {

        "A": 1.0,

        "B": pd.Timestamp("20130102"),

        "C": pd.Series(1, index=list(range(4)), dtype="float32"),

        "D": np.array([3] * 4, dtype="int32"),

        "E": pd.Categorical(["test", "train", "test", "train"]),

        "F": "foo",

    }

)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [8]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [9]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 3
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype        
---  ------  --------------  -----        
 0   A       4 non-null      float64      
 1   B       4 non-null      datetime64[s]
 2   C       4 non-null      float32      
 3   D       4 non-null      int32        
 4   E       4 non-null      category     
 5   F       4 non-null      object       
dtypes: category(1), datetime64[s](1), float32(1), float64(1), int32(1), object(1)
memory usage: 288.0+ bytes


In [10]:
df.head()

Unnamed: 0,A,B,C,D
2025-04-12,-1.424472,0.349569,0.706149,0.068881
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015
2025-04-14,-0.588822,-0.712983,0.752802,-0.692384
2025-04-15,-0.233184,0.319955,1.538491,1.75019
2025-04-16,0.369683,1.157398,0.987999,-1.747668


In [11]:
df.tail()

Unnamed: 0,A,B,C,D
2025-04-22,-1.560366,-0.473861,-0.699014,-0.44446
2025-04-23,-0.644451,1.126689,0.318808,-0.073075
2025-04-24,0.037099,0.347983,1.502865,-0.429667
2025-04-25,-0.986925,0.383005,0.215083,2.011957
2025-04-26,0.892909,-0.853073,-0.834381,-1.267181


In [12]:
df.index


DatetimeIndex(['2025-04-12', '2025-04-13', '2025-04-14', '2025-04-15',
               '2025-04-16', '2025-04-17', '2025-04-18', '2025-04-19',
               '2025-04-20', '2025-04-21', '2025-04-22', '2025-04-23',
               '2025-04-24', '2025-04-25', '2025-04-26'],
              dtype='datetime64[ns]', freq='D')

In [13]:
df2.index

Index([0, 1, 2, 3], dtype='int64')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [16]:
#if we want to convert our data into numpy array we use this function. The two brackets represents the number of rows and columns in the data. 
# The data is stored in the variable “data” which is a list of lists. The data is then converted into a numpy array using the numpy function “array()”.
df.to_numpy()

array([[-1.42447224,  0.34956944,  0.70614895,  0.06888122],
       [-0.09127547,  2.54211457, -1.57229407, -1.52701494],
       [-0.58882191, -0.7129833 ,  0.75280185, -0.69238394],
       [-0.23318427,  0.3199553 ,  1.53849092,  1.75018963],
       [ 0.36968303,  1.1573984 ,  0.98799887, -1.74766814],
       [-0.83075104, -0.71660908,  1.18896266,  0.92315615],
       [ 0.51638134,  0.17018665, -1.06148028,  1.1028401 ],
       [-0.02168241,  1.21456842,  0.46178172,  0.12043523],
       [-1.16388025, -0.82406189, -2.03569161,  0.03000648],
       [ 1.29394519, -0.94791573, -1.57976485,  1.1966464 ],
       [-1.56036587, -0.47386143, -0.69901391, -0.44446001],
       [-0.64445147,  1.1266894 ,  0.3188084 , -0.07307526],
       [ 0.03709883,  0.34798318,  1.50286549, -0.4296673 ],
       [-0.98692487,  0.38300548,  0.21508326,  2.01195721],
       [ 0.89290898, -0.8530731 , -0.83438098, -1.26718108]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,15.0,15.0,15.0,15.0
mean,-0.295719,0.205531,-0.007312,0.068177
std,0.839744,0.999499,1.19117,1.14707
min,-1.560366,-0.947916,-2.035692,-1.747668
25%,-0.908838,-0.714796,-0.947931,-0.568422
50%,-0.233184,0.319955,0.318808,0.030006
75%,0.203391,0.754847,0.8704,1.012998
max,1.293945,2.542115,1.538491,2.011957


In [18]:
df.T

Unnamed: 0,2025-04-12,2025-04-13,2025-04-14,2025-04-15,2025-04-16,2025-04-17,2025-04-18,2025-04-19,2025-04-20,2025-04-21,2025-04-22,2025-04-23,2025-04-24,2025-04-25,2025-04-26
A,-1.424472,-0.091275,-0.588822,-0.233184,0.369683,-0.830751,0.516381,-0.021682,-1.16388,1.293945,-1.560366,-0.644451,0.037099,-0.986925,0.892909
B,0.349569,2.542115,-0.712983,0.319955,1.157398,-0.716609,0.170187,1.214568,-0.824062,-0.947916,-0.473861,1.126689,0.347983,0.383005,-0.853073
C,0.706149,-1.572294,0.752802,1.538491,0.987999,1.188963,-1.06148,0.461782,-2.035692,-1.579765,-0.699014,0.318808,1.502865,0.215083,-0.834381
D,0.068881,-1.527015,-0.692384,1.75019,-1.747668,0.923156,1.10284,0.120435,0.030006,1.196646,-0.44446,-0.073075,-0.429667,2.011957,-1.267181


In [19]:
df.T

Unnamed: 0,2025-04-12,2025-04-13,2025-04-14,2025-04-15,2025-04-16,2025-04-17,2025-04-18,2025-04-19,2025-04-20,2025-04-21,2025-04-22,2025-04-23,2025-04-24,2025-04-25,2025-04-26
A,-1.424472,-0.091275,-0.588822,-0.233184,0.369683,-0.830751,0.516381,-0.021682,-1.16388,1.293945,-1.560366,-0.644451,0.037099,-0.986925,0.892909
B,0.349569,2.542115,-0.712983,0.319955,1.157398,-0.716609,0.170187,1.214568,-0.824062,-0.947916,-0.473861,1.126689,0.347983,0.383005,-0.853073
C,0.706149,-1.572294,0.752802,1.538491,0.987999,1.188963,-1.06148,0.461782,-2.035692,-1.579765,-0.699014,0.318808,1.502865,0.215083,-0.834381
D,0.068881,-1.527015,-0.692384,1.75019,-1.747668,0.923156,1.10284,0.120435,0.030006,1.196646,-0.44446,-0.073075,-0.429667,2.011957,-1.267181


In [20]:
df.sort_index(axis=1, ascending=False)
#axis one is the columns, axis zero is the rows

Unnamed: 0,D,C,B,A
2025-04-12,0.068881,0.706149,0.349569,-1.424472
2025-04-13,-1.527015,-1.572294,2.542115,-0.091275
2025-04-14,-0.692384,0.752802,-0.712983,-0.588822
2025-04-15,1.75019,1.538491,0.319955,-0.233184
2025-04-16,-1.747668,0.987999,1.157398,0.369683
2025-04-17,0.923156,1.188963,-0.716609,-0.830751
2025-04-18,1.10284,-1.06148,0.170187,0.516381
2025-04-19,0.120435,0.461782,1.214568,-0.021682
2025-04-20,0.030006,-2.035692,-0.824062,-1.16388
2025-04-21,1.196646,-1.579765,-0.947916,1.293945


In [21]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2025-04-26,0.892909,-0.853073,-0.834381,-1.267181
2025-04-25,-0.986925,0.383005,0.215083,2.011957
2025-04-24,0.037099,0.347983,1.502865,-0.429667
2025-04-23,-0.644451,1.126689,0.318808,-0.073075
2025-04-22,-1.560366,-0.473861,-0.699014,-0.44446
2025-04-21,1.293945,-0.947916,-1.579765,1.196646
2025-04-20,-1.16388,-0.824062,-2.035692,0.030006
2025-04-19,-0.021682,1.214568,0.461782,0.120435
2025-04-18,0.516381,0.170187,-1.06148,1.10284
2025-04-17,-0.830751,-0.716609,1.188963,0.923156


In [28]:
df.sort_values(by=['A' and 'C'])

Unnamed: 0,A,B,C,D
2025-04-20,-1.16388,-0.824062,-2.035692,0.030006
2025-04-21,1.293945,-0.947916,-1.579765,1.196646
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015
2025-04-18,0.516381,0.170187,-1.06148,1.10284
2025-04-26,0.892909,-0.853073,-0.834381,-1.267181
2025-04-22,-1.560366,-0.473861,-0.699014,-0.44446
2025-04-25,-0.986925,0.383005,0.215083,2.011957
2025-04-23,-0.644451,1.126689,0.318808,-0.073075
2025-04-19,-0.021682,1.214568,0.461782,0.120435
2025-04-12,-1.424472,0.349569,0.706149,0.068881


In [29]:
df

Unnamed: 0,A,B,C,D
2025-04-12,-1.424472,0.349569,0.706149,0.068881
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015
2025-04-14,-0.588822,-0.712983,0.752802,-0.692384
2025-04-15,-0.233184,0.319955,1.538491,1.75019
2025-04-16,0.369683,1.157398,0.987999,-1.747668
2025-04-17,-0.830751,-0.716609,1.188963,0.923156
2025-04-18,0.516381,0.170187,-1.06148,1.10284
2025-04-19,-0.021682,1.214568,0.461782,0.120435
2025-04-20,-1.16388,-0.824062,-2.035692,0.030006
2025-04-21,1.293945,-0.947916,-1.579765,1.196646


In [30]:
df[['A', 'B']]

Unnamed: 0,A,B
2025-04-12,-1.424472,0.349569
2025-04-13,-0.091275,2.542115
2025-04-14,-0.588822,-0.712983
2025-04-15,-0.233184,0.319955
2025-04-16,0.369683,1.157398
2025-04-17,-0.830751,-0.716609
2025-04-18,0.516381,0.170187
2025-04-19,-0.021682,1.214568
2025-04-20,-1.16388,-0.824062
2025-04-21,1.293945,-0.947916


In [31]:
df

Unnamed: 0,A,B,C,D
2025-04-12,-1.424472,0.349569,0.706149,0.068881
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015
2025-04-14,-0.588822,-0.712983,0.752802,-0.692384
2025-04-15,-0.233184,0.319955,1.538491,1.75019
2025-04-16,0.369683,1.157398,0.987999,-1.747668
2025-04-17,-0.830751,-0.716609,1.188963,0.923156
2025-04-18,0.516381,0.170187,-1.06148,1.10284
2025-04-19,-0.021682,1.214568,0.461782,0.120435
2025-04-20,-1.16388,-0.824062,-2.035692,0.030006
2025-04-21,1.293945,-0.947916,-1.579765,1.196646


In [34]:
df.shape


(15, 4)

In [36]:
df.iloc[0:2, 0:4]

Unnamed: 0,A,B,C,D
2025-04-12,-1.424472,0.349569,0.706149,0.068881
2025-04-13,-0.091275,2.542115,-1.572294,-1.527015


In [43]:
df.loc[:, ['A', 'C']]

Unnamed: 0,A,C
2025-04-12,-1.424472,0.706149
2025-04-13,-0.091275,-1.572294
2025-04-14,-0.588822,0.752802
2025-04-15,-0.233184,1.538491
2025-04-16,0.369683,0.987999
2025-04-17,-0.830751,1.188963
2025-04-18,0.516381,-1.06148
2025-04-19,-0.021682,0.461782
2025-04-20,-1.16388,-2.035692
2025-04-21,1.293945,-1.579765


# Boolean Indexing

In [46]:
import seaborn as sns
df3=sns.load_dataset('titanic')   

In [47]:
df3

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [48]:
df3.sample(100)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
746,0,3,male,16.0,1,1,20.2500,S,Third,man,True,,Southampton,no,False
507,1,1,male,,0,0,26.5500,S,First,man,True,,Southampton,yes,True
419,0,3,female,10.0,0,2,24.1500,S,Third,child,False,,Southampton,no,False
47,1,3,female,,0,0,7.7500,Q,Third,woman,False,,Queenstown,yes,True
190,1,2,female,32.0,0,0,13.0000,S,Second,woman,False,,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,1,1,female,49.0,0,0,25.9292,S,First,woman,False,D,Southampton,yes,True
721,0,3,male,17.0,1,0,7.0542,S,Third,man,True,,Southampton,no,False
629,0,3,male,,0,0,7.7333,Q,Third,man,True,,Queenstown,no,True
575,0,3,male,19.0,0,0,14.5000,S,Third,man,True,,Southampton,no,True


In [58]:
df3[df3['fare']<5]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
179,0,3,male,36.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
263,0,1,male,40.0,0,0,0.0,S,First,man,True,B,Southampton,no,True
271,1,3,male,25.0,0,0,0.0,S,Third,man,True,,Southampton,yes,True
277,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
302,0,3,male,19.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
378,0,3,male,20.0,0,0,4.0125,C,Third,man,True,,Cherbourg,no,True
413,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
466,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
481,0,2,male,,0,0,0.0,S,Second,man,True,,Southampton,no,True
597,0,3,male,49.0,0,0,0.0,S,Third,man,True,,Southampton,no,True
