# PANDAS Library

In [1]:
import pandas as pd
import numpy as np

print("=== 1. Creating DataFrames and Series ===")
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
        'Age': [24, 27, 22, 32, 29],
        'Score': [85.5, 90.0, np.nan, 88.5, 95.0]}

df = pd.DataFrame(data)
print("1. DataFrame:\n", df)

s = pd.Series([10, 20, 30])
print("\n2. Series:\n", s)

=== 1. Creating DataFrames and Series ===
1. DataFrame:
       Name  Age  Score
0    Alice   24   85.5
1      Bob   27   90.0
2  Charlie   22    NaN
3    David   32   88.5
4      Eva   29   95.0

2. Series:
 0    10
1    20
2    30
dtype: int64


In [2]:
type(df)

pandas.core.frame.DataFrame

In [3]:
type(s)

pandas.core.series.Series

In [4]:
df.to_excel('my data.xlsx',index=False)    #to save a dataframe in excel format

In [5]:
new_df=pd.read_excel('my data.xlsx')

In [6]:
new_df

Unnamed: 0,Name,Age,Score
0,Alice,24,85.5
1,Bob,27,90.0
2,Charlie,22,
3,David,32,88.5
4,Eva,29,95.0


In [7]:
print("\n=== 2. Basic Information ===")
print("3. df.head():\n", df.head()) #top 5 rows 
print("4. df.tail():\n", df.tail()) #bottom 5 rows
print("5. df.shape:", df.shape) #return (rows,columns) within dataframe
print("6. df.columns:", df.columns) #column names
print("7. df.index:", df.index) #total rows
print("8. df.dtypes:\n", df.dtypes) #column data types
print("9. df.info():"); df.info() #overall info


=== 2. Basic Information ===
3. df.head():
       Name  Age  Score
0    Alice   24   85.5
1      Bob   27   90.0
2  Charlie   22    NaN
3    David   32   88.5
4      Eva   29   95.0
4. df.tail():
       Name  Age  Score
0    Alice   24   85.5
1      Bob   27   90.0
2  Charlie   22    NaN
3    David   32   88.5
4      Eva   29   95.0
5. df.shape: (5, 3)
6. df.columns: Index(['Name', 'Age', 'Score'], dtype='object')
7. df.index: RangeIndex(start=0, stop=5, step=1)
8. df.dtypes:
 Name      object
Age        int64
Score    float64
dtype: object
9. df.info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    5 non-null      object 
 1   Age     5 non-null      int64  
 2   Score   4 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 252.0+ bytes


In [8]:
print("\n=== 3. Descriptive Statistics ===")
print("10. df.describe():\n", df.describe())                               #return overall basic calculations
print("11. df['Age'].mean():", df['Age'].mean())
print("12. df['Age'].median():", df['Age'].median())
print("13. df['Age'].mode():", df['Age'].mode().tolist())                  #return mode values into list
print("14. df['Score'].isnull():\n", df['Score'].isnull())                 #to check is null value present or not
print("14. df['Score'].isnull():\n", df['Score'].isnull().sum())           #return total no. of null values.


=== 3. Descriptive Statistics ===
10. df.describe():
              Age      Score
count   5.000000   4.000000
mean   26.800000  89.750000
std     3.962323   3.968627
min    22.000000  85.500000
25%    24.000000  87.750000
50%    27.000000  89.250000
75%    29.000000  91.250000
max    32.000000  95.000000
11. df['Age'].mean(): 26.8
12. df['Age'].median(): 27.0
13. df['Age'].mode(): [22, 24, 27, 29, 32]
14. df['Score'].isnull():
 0    False
1    False
2     True
3    False
4    False
Name: Score, dtype: bool
14. df['Score'].isnull():
 1


In [9]:
df.head(2)

Unnamed: 0,Name,Age,Score
0,Alice,24,85.5
1,Bob,27,90.0


In [10]:
print("\n=== 4. Handling Missing Values ===")
print("15. Fill NaN:\n", df['Score'].fillna(0))                            #to fill empty values with the set value e.g. 0
print("16. Drop rows with NaN:\n", df.dropna())                            #to drop an empty rows/columns 

print("\n=== 5. Data Selection & Filtering ===")
print("17. df['Name']:\n", df['Name'])
print("18. df[['Name', 'Age']]:\n", df[['Name', 'Age']])
print("19. df.loc[0]:\n", df.loc[0])                                      #to locate the series which has non-integer index e.g. A,B,C,...
print("20. df.iloc[1]:\n", df.iloc[1])                                    #to locate the series which has interger as index, e.g. 0,1,2,3,...
print("21. df[df['Age'] > 25]:\n", df[df['Age'] > 25])

print("\n=== 6. Modifying Data ===")
df['Passed'] = df['Score'] > 85
print("22. Add 'Passed' column:\n", df)


=== 4. Handling Missing Values ===
15. Fill NaN:
 0    85.5
1    90.0
2     0.0
3    88.5
4    95.0
Name: Score, dtype: float64
16. Drop rows with NaN:
     Name  Age  Score
0  Alice   24   85.5
1    Bob   27   90.0
3  David   32   88.5
4    Eva   29   95.0

=== 5. Data Selection & Filtering ===
17. df['Name']:
 0      Alice
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object
18. df[['Name', 'Age']]:
       Name  Age
0    Alice   24
1      Bob   27
2  Charlie   22
3    David   32
4      Eva   29
19. df.loc[0]:
 Name     Alice
Age         24
Score     85.5
Name: 0, dtype: object
20. df.iloc[1]:
 Name      Bob
Age        27
Score    90.0
Name: 1, dtype: object
21. df[df['Age'] > 25]:
     Name  Age  Score
1    Bob   27   90.0
3  David   32   88.5
4    Eva   29   95.0

=== 6. Modifying Data ===
22. Add 'Passed' column:
       Name  Age  Score  Passed
0    Alice   24   85.5    True
1      Bob   27   90.0    True
2  Charlie   22    NaN   False
3    David   32   88

In [11]:
df['Passed']=['Yes','No','No','No','No']                                    #to  add column in dataframe

In [12]:
fruits=pd.DataFrame({'kg':[3,4,5],'units':[20,40,50]},index=['apple','mango','banana'])

In [13]:
df.drop(2,axis=0,inplace=True)                                              #to delete a row placed at index 2

In [14]:
df.reset_index(drop=True) 

Unnamed: 0,Name,Age,Score,Passed
0,Alice,24,85.5,Yes
1,Bob,27,90.0,No
2,David,32,88.5,No
3,Eva,29,95.0,No


In [15]:
df.drop('Passed',axis=1)                                                    #to delete a column having name passed

Unnamed: 0,Name,Age,Score
0,Alice,24,85.5
1,Bob,27,90.0
3,David,32,88.5
4,Eva,29,95.0


In [16]:
# In pandas by default axis=0 means dealing with rows
# In pandas Returns a new DataFrame unless you use inplace=True., it will not replace original table unless you do inplace =True

df.drop([0,1])                                                              # it will drop a rows with index 0 & 1


Unnamed: 0,Name,Age,Score,Passed
3,David,32,88.5,No
4,Eva,29,95.0,No


In [17]:
# In pandas, drop() does not require double brackets for multiple columns.
df.drop(['Score','Age'],axis=1)

Unnamed: 0,Name,Passed
0,Alice,Yes
1,Bob,No
3,David,No
4,Eva,No


# Advanced

In [21]:
df.at[2, 'Score'] = 78                                             #to modify a single particular cell 
print("23. Modify single value:\n", df)  

print("\n=== 7. Sorting & Grouping ===")
print("24. Sorted by Age:\n", df.sort_values(by='Age'))
print("25. Group by Passed:\n", df.groupby("Passed")["Score"].mean())
print("\n=== 8. File I/O (Commented out) ===")
# df.to_csv('output.csv', index=False)  # 26. Save to CSV
# df2 = pd.read_csv('output.csv')      # 27. Read from CSV

print("\n=== 9. Advanced Indexing & Operations ===")
print("26. Unique Ages:", df['Age'].unique())
print("27. Value counts (Age):\n", df['Age'].value_counts())
print("28. Apply function (double Age):\n", df['Age'].apply(lambda x: x * 2))
print("29. Rename columns:\n", df.rename(columns={'Score': 'Marks'}))
print("30. Drop column 'Passed':\n", df.drop(columns=['Passed']))  

23. Modify single value:
     Name   Age  Score Passed
0  Alice  24.0   85.5    Yes
1    Bob  27.0   90.0     No
3  David  32.0   88.5     No
4    Eva  29.0   95.0     No
2    NaN   NaN   78.0    NaN

=== 7. Sorting & Grouping ===
24. Sorted by Age:
     Name   Age  Score Passed
0  Alice  24.0   85.5    Yes
1    Bob  27.0   90.0     No
4    Eva  29.0   95.0     No
3  David  32.0   88.5     No
2    NaN   NaN   78.0    NaN
25. Group by Passed:
 Passed
No     91.166667
Yes    85.500000
Name: Score, dtype: float64

=== 8. File I/O (Commented out) ===

=== 9. Advanced Indexing & Operations ===
26. Unique Ages: [24. 27. 32. 29. nan]
27. Value counts (Age):
 Age
24.0    1
27.0    1
32.0    1
29.0    1
Name: count, dtype: int64
28. Apply function (double Age):
 0    48.0
1    54.0
3    64.0
4    58.0
2     NaN
Name: Age, dtype: float64
29. Rename columns:
     Name   Age  Marks Passed
0  Alice  24.0   85.5    Yes
1    Bob  27.0   90.0     No
3  David  32.0   88.5     No
4    Eva  29.0   95.0  

# To read Json using pandas

In [29]:
import pandas as pd

df=pd.read_json("sample_students.json")     
df

# print(df.to_string())  ->we can also used this method to display / store a data within Dataframe in string format. 

Unnamed: 0,name,age,gender,marks_12th,qualification_exam,desired_course
Physics,Akshay Kumar,18,Male,85.0,,Computer Science Engineering
Chemistry,Akshay Kumar,18,Male,78.0,,Computer Science Engineering
Maths,Akshay Kumar,18,Male,90.0,,Computer Science Engineering
English,Akshay Kumar,18,Male,80.0,,Computer Science Engineering
Computer Science,Akshay Kumar,18,Male,88.0,,Computer Science Engineering
exam_name,Akshay Kumar,18,Male,,JEE,Computer Science Engineering
score,Akshay Kumar,18,Male,,92,Computer Science Engineering


# To read SQLITE3- Database File

In [36]:
# using pandas to see the results more precisely
import sqlite3
import pandas as pd

with sqlite3.connect("akshay.db") as connection:
    df=pd.read_sql("select * from data",connection)   #here data is a table within database named akshay
df


Unnamed: 0,id,name
0,2,Akshay
