### Pandas
pandas a python library that is extensively used for data analytics, ML and other Data fields  
It provides possibility to organise data into something called Dataframe, which acts as a table of data (2D)  
It is organised into rows and columns with named indices

In [1]:
import pandas as pd

In [2]:
# Create a data frame using the pandas function
DF = pd.DataFrame ({
                    'Name' : ['Vishnu', "Ram", 'Bharat'], 
                    'Id' : [234, 201, 78],
                    'Course' : ['ML', 'GenAI', "Python"],
                    'Batch' : ['C1', 'C2', 'C1']})
DF

Unnamed: 0,Name,Id,Course,Batch
0,Vishnu,234,ML,C1
1,Ram,201,GenAI,C2
2,Bharat,78,Python,C1


In [3]:
# Let us understand how is it organised
print ("Data type : \n", DF.dtypes)

print ("Index : \n", DF.index)

print ("Number of Records : \n", len(DF))

print ("Number of Fields : \n", len(DF.columns))

Data type : 
 Name      object
Id         int64
Course    object
Batch     object
dtype: object
Index : 
 RangeIndex(start=0, stop=3, step=1)
Number of Records : 
 3
Number of Fields : 
 4


In [4]:
# Access specific Column
Names = DF['Name']

print ("Type : \n", type (Names))
print ("Content : \n",  Names)

Type : 
 <class 'pandas.core.series.Series'>
Content : 
 0    Vishnu
1       Ram
2    Bharat
Name: Name, dtype: object


In [5]:
# Access specific Row
Rec_1 = DF.iloc [1] # Access a row based on the index position

print ("Type : \n", type (Rec_1))
print ("Content : \n",  Rec_1)

Type : 
 <class 'pandas.core.series.Series'>
Content : 
 Name        Ram
Id          201
Course    GenAI
Batch        C2
Name: 1, dtype: object


In [6]:
# Make specific Column in a data frame
Names = DF[['Name']]

print ("Type : \n", type (Names))
print ("Content : \n",  Names)

Type : 
 <class 'pandas.core.frame.DataFrame'>
Content : 
      Name
0  Vishnu
1     Ram
2  Bharat


In [7]:
# Filter out rows based on condition
Filtered = DF[DF['Batch'] == 'C1']
Filtered

Unnamed: 0,Name,Id,Course,Batch
0,Vishnu,234,ML,C1
2,Bharat,78,Python,C1


In [8]:
# Update an exising column values : just an assignment
DF['Id'] = [145, 156, 88]
DF

Unnamed: 0,Name,Id,Course,Batch
0,Vishnu,145,ML,C1
1,Ram,156,GenAI,C2
2,Bharat,88,Python,C1


In [9]:
# Add another column, again just like a variable assignment
DF ['Score'] = [78, 85, 80]
DF

Unnamed: 0,Name,Id,Course,Batch,Score
0,Vishnu,145,ML,C1,78
1,Ram,156,GenAI,C2,85
2,Bharat,88,Python,C1,80


In [10]:
# Add Rows by concatenating
new_rec = pd.DataFrame ({
                        'Name' : ['Dileep', 'Phani'],
                        'Id' : [101, 102],
                        'Course' : ['GenAI', 'ML'],
                        'Batch' : ['C1', 'C2'],
                        'Score' : [80, 75]
})
new_rec

Unnamed: 0,Name,Id,Course,Batch,Score
0,Dileep,101,GenAI,C1,80
1,Phani,102,ML,C2,75


In [11]:
DF = pd.concat ([DF, new_rec], ignore_index=True)
DF

Unnamed: 0,Name,Id,Course,Batch,Score
0,Vishnu,145,ML,C1,78
1,Ram,156,GenAI,C2,85
2,Bharat,88,Python,C1,80
3,Dileep,101,GenAI,C1,80
4,Phani,102,ML,C2,75


In [12]:
# Aggregating functions. See how the column is accessed
print ("Min Score : ", DF.Score.min())
print ("Max Score : ", DF.Score.max())
print ("Mean Score : ", DF.Score.mean())
print ("Median Score : ", DF.Score.median())

Min Score :  75
Max Score :  85
Mean Score :  79.6
Median Score :  80.0


In [13]:
# Check the Max score per course : Use Group by
grp = DF.groupby ('Course') ['Score'].max ().rename ("Max ")
grp

Course
GenAI     85
ML        78
Python    80
Name: Max , dtype: int64

In [14]:
# Read data from a CSV file into Data Frame
Index_Data = pd.read_csv ('Index.csv')
print (Index_Data.dtypes)

Index Name     object
Date           object
Open          float64
High          float64
Low           float64
Close         float64
dtype: object


In [None]:
# Change the Date field to datetime type
Index_Data['Date'] = pd.to_datetime(Index_Data['Date'])
print (Index_Data.dtypes)
Index_Data

Index Name            object
Date          datetime64[ns]
Open                 float64
High                 float64
Low                  float64
Close                float64
dtype: object


Unnamed: 0,Index Name,Date,Open,High,Low,Close
0,NIFTY 50,2025-06-13,24473.00,24754.35,24473.00,24718.60
1,NIFTY 50,2025-06-12,25164.45,25196.20,24825.90,24888.20
2,NIFTY 50,2025-06-11,25134.15,25222.40,25081.30,25141.40
3,NIFTY 50,2025-06-10,25196.05,25199.30,25055.45,25104.25
4,NIFTY 50,2025-06-09,25160.10,25160.10,25077.15,25103.20
...,...,...,...,...,...,...
252,NIFTY 50,2024-06-07,22821.85,23320.20,22789.05,23290.15
253,NIFTY 50,2024-06-06,22798.60,22910.15,22642.60,22821.40
254,NIFTY 50,2024-06-05,22128.35,22670.40,21791.95,22620.35
255,NIFTY 50,2024-06-04,23179.50,23179.50,21281.45,21884.50


In [None]:
# Identify the days where Close price is higher than open.
# By filtering

Green_Days = Index_Data.loc [Index_Data['Close'] > Index_Data ['Open'], :]
print (len(Green_Days))
Green_Days

In [None]:
# Identify the days where Close price is at least 0.5% higher than the Open price
Proper_Green_Days = Index_Data [Index_Data['Close'] > (Index_Data ['Open'] * 1.005)]
Proper_Green_Days = Proper_Green_Days.sort_values (by='Date')

print (len(Proper_Green_Days))
Proper_Green_Days

In [None]:
# Identify the Previous day close, by shifting
Index_Data.sort_values (by='Date', inplace=True)
Index_Data ['PDC'] = Index_Data['Close'].shift (1)
Index_Data


In [None]:
# identify the gap up days (at least .5% from previous day close)
Gap_Up_Days = Index_Data[Index_Data['Open'] > (Index_Data['PDC'] * 1.005)]
print (len(Gap_Up_Days))
Gap_Up_Days