# PANDAS WITH PYTHON

In [1]:
import pandas as pd
import numpy as np

In [2]:
#object creation
#series have one col of index and other column of our values we can also add null value in it
s=pd.Series([1,3,5,np.nan,7,9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    7.0
5    9.0
dtype: float64

In [3]:
#daterange is a function to get dates its structure is YYYYMMDD
dates=pd.date_range("20130101",periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
dates=pd.date_range("20250723",periods=10)
dates

DatetimeIndex(['2025-07-23', '2025-07-24', '2025-07-25', '2025-07-26',
               '2025-07-27', '2025-07-28', '2025-07-29', '2025-07-30',
               '2025-07-31', '2025-08-01'],
              dtype='datetime64[ns]', freq='D')

In [5]:
#converting dates into dataframe
dates=pd.date_range("20230101",periods=6)
#first use library,then numpy random array with 6 rows and 4 col
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2023-01-01,0.698788,-1.445356,0.582254,1.02509
2023-01-02,-0.792309,-0.774628,-0.264254,0.090766
2023-01-03,1.198011,1.190848,-0.683475,-0.622418
2023-01-04,1.200031,0.421776,1.354653,0.026962
2023-01-05,0.594303,-0.99812,-0.492832,-1.679045
2023-01-06,0.960836,-0.714966,-1.069652,-0.588049


In [6]:
#now upper dataframe using dict
df1=pd.DataFrame(
    {
        #ABCDEF are keys of dict
        "A":1.0,
        "B":pd.Timestamp("20250724"),
        "C":pd.Series(1,index=list(range(8)),dtype="float32"),
        #2*8 means[2,2,2,2,2,2,2,2] according to col outside numbers change
        "D":np.array([2]*8,dtype="int32"),
        "E":pd.Categorical(["Biryani","Kabab","Spagetti","pizza","Afghanipulao","Haleem","Karahi","Kofty"]),
        "F":"food",
    }
)
df1

Unnamed: 0,A,B,C,D,E,F
0,1.0,2025-07-24,1.0,2,Biryani,food
1,1.0,2025-07-24,1.0,2,Kabab,food
2,1.0,2025-07-24,1.0,2,Spagetti,food
3,1.0,2025-07-24,1.0,2,pizza,food
4,1.0,2025-07-24,1.0,2,Afghanipulao,food
5,1.0,2025-07-24,1.0,2,Haleem,food
6,1.0,2025-07-24,1.0,2,Karahi,food
7,1.0,2025-07-24,1.0,2,Kofty,food


In [7]:
df1.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [8]:
#for viewing data starting col
df1.head(2)

Unnamed: 0,A,B,C,D,E,F
0,1.0,2025-07-24,1.0,2,Biryani,food
1,1.0,2025-07-24,1.0,2,Kabab,food


In [9]:
#for viewing data last 5 rows if wanr particular number write in col
df1.tail(2)

Unnamed: 0,A,B,C,D,E,F
6,1.0,2025-07-24,1.0,2,Karahi,food
7,1.0,2025-07-24,1.0,2,Kofty,food


In [10]:
#index is rows head
df1.index

Index([0, 1, 2, 3, 4, 5, 6, 7], dtype='int64')

In [11]:
#convert our dataframe into numpy array
df1.to_numpy()

array([[1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Biryani', 'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Kabab', 'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Spagetti',
        'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'pizza', 'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Afghanipulao',
        'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Haleem', 'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Karahi', 'food'],
       [1.0, Timestamp('2025-07-24 00:00:00'), 1.0, 2, 'Kofty', 'food']],
      dtype=object)

In [12]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.643277,-0.386741,-0.095551,-0.291116
std,0.746315,0.989457,0.899259,0.905945
min,-0.792309,-1.445356,-1.069652,-1.679045
25%,0.620424,-0.942247,-0.635814,-0.613825
50%,0.829812,-0.744797,-0.378543,-0.280543
75%,1.138717,0.13759,0.370627,0.074815
max,1.200031,1.190848,1.354653,1.02509


In [13]:
#transpose our dataframe(rows into col,col into rows)
df1.T

Unnamed: 0,0,1,2,3,4,5,6,7
A,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
B,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00,2025-07-24 00:00:00
C,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
D,2,2,2,2,2,2,2,2
E,Biryani,Kabab,Spagetti,pizza,Afghanipulao,Haleem,Karahi,Kofty
F,food,food,food,food,food,food,food,food


In [14]:
#sorting(asc=false tw all outputs from below to above (7 to 1),if true(0to7)
df1.sort_index(axis=1,ascending=True)

Unnamed: 0,A,B,C,D,E,F
0,1.0,2025-07-24,1.0,2,Biryani,food
1,1.0,2025-07-24,1.0,2,Kabab,food
2,1.0,2025-07-24,1.0,2,Spagetti,food
3,1.0,2025-07-24,1.0,2,pizza,food
4,1.0,2025-07-24,1.0,2,Afghanipulao,food
5,1.0,2025-07-24,1.0,2,Haleem,food
6,1.0,2025-07-24,1.0,2,Karahi,food
7,1.0,2025-07-24,1.0,2,Kofty,food


In [15]:
#according to condiion means all sorting acc to B col
df1.sort_values(by="B",ascending=True)

Unnamed: 0,A,B,C,D,E,F
0,1.0,2025-07-24,1.0,2,Biryani,food
1,1.0,2025-07-24,1.0,2,Kabab,food
2,1.0,2025-07-24,1.0,2,Spagetti,food
3,1.0,2025-07-24,1.0,2,pizza,food
4,1.0,2025-07-24,1.0,2,Afghanipulao,food
5,1.0,2025-07-24,1.0,2,Haleem,food
6,1.0,2025-07-24,1.0,2,Karahi,food
7,1.0,2025-07-24,1.0,2,Kofty,food


In [16]:
#row wise selection of data using strings method
df1[0:4]

Unnamed: 0,A,B,C,D,E,F
0,1.0,2025-07-24,1.0,2,Biryani,food
1,1.0,2025-07-24,1.0,2,Kabab,food
2,1.0,2025-07-24,1.0,2,Spagetti,food
3,1.0,2025-07-24,1.0,2,pizza,food


In [17]:
#could get any col like this(filtering)
df1["E"]

0         Biryani
1           Kabab
2        Spagetti
3           pizza
4    Afghanipulao
5          Haleem
6          Karahi
7           Kofty
Name: E, dtype: category
Categories (8, object): ['Afghanipulao', 'Biryani', 'Haleem', 'Kabab', 'Karahi', 'Kofty', 'Spagetti', 'pizza']

In [18]:
#dates row wise data if 1 then 2 , 0 then 1 so on
df.loc[dates[0]]

A    0.698788
B   -1.445356
C    0.582254
D    1.025090
Name: 2023-01-01 00:00:00, dtype: float64

In [19]:
#col wise data (rows ratio (if all just:) then cols name you want (A,B,C) [if:then 1 to 4]if, then only 1&4(below cell eg)
df1.loc[1:4,["A","B","E"]]

Unnamed: 0,A,B,E
1,1.0,2025-07-24,Kabab
2,1.0,2025-07-24,Spagetti
3,1.0,2025-07-24,pizza
4,1.0,2025-07-24,Afghanipulao


In [20]:
df1.loc[[1,4],["A","B","E"]]

Unnamed: 0,A,B,E
1,1.0,2025-07-24,Kabab
4,1.0,2025-07-24,Afghanipulao


In [21]:
#AT function first value of A
df.at[dates[0],"A"]

0.6987879521164705

In [22]:
df1.at[df1[df1["E"] == "Biryani"].index[0], "B"]

Timestamp('2025-07-24 00:00:00')

In [23]:
#whatever is on location 3
df.iloc[3]
df1.iloc[3]
#or series of rows if particular col then after,(0:5)if all rows just:
df1.iloc[0:2,:]
#sari rows sirf 5 col
df1.iloc[:,0:5]

Unnamed: 0,A,B,C,D,E
0,1.0,2025-07-24,1.0,2,Biryani
1,1.0,2025-07-24,1.0,2,Kabab
2,1.0,2025-07-24,1.0,2,Spagetti
3,1.0,2025-07-24,1.0,2,pizza
4,1.0,2025-07-24,1.0,2,Afghanipulao
5,1.0,2025-07-24,1.0,2,Haleem
6,1.0,2025-07-24,1.0,2,Karahi
7,1.0,2025-07-24,1.0,2,Kofty


In [24]:
#kind of selection that if we want how many of us have height greater than 5.0 so we can use this approach
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2023-01-01,0.698788,-1.445356,0.582254,1.02509
2023-01-03,1.198011,1.190848,-0.683475,-0.622418
2023-01-04,1.200031,0.421776,1.354653,0.026962
2023-01-05,0.594303,-0.99812,-0.492832,-1.679045
2023-01-06,0.960836,-0.714966,-1.069652,-0.588049


In [25]:
#if from all data frame all columns
df[df>0]

Unnamed: 0,A,B,C,D
2023-01-01,0.698788,,0.582254,1.02509
2023-01-02,,,,0.090766
2023-01-03,1.198011,1.190848,,
2023-01-04,1.200031,0.421776,1.354653,0.026962
2023-01-05,0.594303,,,
2023-01-06,0.960836,,,


In [26]:
#adding new col in our dataset
df1["G=Rating"]=[10,6,8,9,7,6,8,9]
df1

Unnamed: 0,A,B,C,D,E,F,G=Rating
0,1.0,2025-07-24,1.0,2,Biryani,food,10
1,1.0,2025-07-24,1.0,2,Kabab,food,6
2,1.0,2025-07-24,1.0,2,Spagetti,food,8
3,1.0,2025-07-24,1.0,2,pizza,food,9
4,1.0,2025-07-24,1.0,2,Afghanipulao,food,7
5,1.0,2025-07-24,1.0,2,Haleem,food,6
6,1.0,2025-07-24,1.0,2,Karahi,food,8
7,1.0,2025-07-24,1.0,2,Kofty,food,9


In [27]:
#creating new col of avg values of other col
df["avg"] = df[["A", "B", "C","D"]].mean(axis=1)
df

Unnamed: 0,A,B,C,D,avg
2023-01-01,0.698788,-1.445356,0.582254,1.02509,0.215194
2023-01-02,-0.792309,-0.774628,-0.264254,0.090766,-0.435106
2023-01-03,1.198011,1.190848,-0.683475,-0.622418,0.270741
2023-01-04,1.200031,0.421776,1.354653,0.026962,0.750855
2023-01-05,0.594303,-0.99812,-0.492832,-1.679045,-0.643924
2023-01-06,0.960836,-0.714966,-1.069652,-0.588049,-0.352958


In [28]:
df1["avg"] = df1[["A", "D"]].sum(axis=1)
df1

Unnamed: 0,A,B,C,D,E,F,G=Rating,avg
0,1.0,2025-07-24,1.0,2,Biryani,food,10,3.0
1,1.0,2025-07-24,1.0,2,Kabab,food,6,3.0
2,1.0,2025-07-24,1.0,2,Spagetti,food,8,3.0
3,1.0,2025-07-24,1.0,2,pizza,food,9,3.0
4,1.0,2025-07-24,1.0,2,Afghanipulao,food,7,3.0
5,1.0,2025-07-24,1.0,2,Haleem,food,6,3.0
6,1.0,2025-07-24,1.0,2,Karahi,food,8,3.0
7,1.0,2025-07-24,1.0,2,Kofty,food,9,3.0
