In [1]:
import pandas as pd

data = [10, 20, 30, 40]
s = pd.Series(data)

print(s)


0    10
1    20
2    30
3    40
dtype: int64


In [2]:
import pandas as pd

data = [100, 200, 300, 400]
index_labels = ['a', 'b', 'c', 'd']

s = pd.Series(data, index=index_labels, name="Scores")

print(s)
print("Name of Series:", s.name)
print("Index Labels:", s.index)


a    100
b    200
c    300
d    400
Name: Scores, dtype: int64
Name of Series: Scores
Index Labels: Index(['a', 'b', 'c', 'd'], dtype='object')


In [3]:
print(s['a'])      # Access using label
print(s[0])        # Access using position
print(s[['b', 'd']])  # Multiple elements


100
100
b    200
d    400
Name: Scores, dtype: int64


  print(s[0])        # Access using position


In [4]:
print(s.iloc[0])    # Access by position
print(s.loc['a'])   # Access by label


100
100


In [5]:
# Slicing using label range
print(s['a':'c'])   # Includes 'c'

# Slicing using position
print(s.iloc[1:3])  # Excludes index 3

# Modifying value by label
s['b'] = 250
print(s)

# Modifying value by position
s.iloc[2] = 350
print(s)


a    100
b    200
c    300
Name: Scores, dtype: int64
b    200
c    300
Name: Scores, dtype: int64
a    100
b    250
c    300
d    400
Name: Scores, dtype: int64
a    100
b    250
c    350
d    400
Name: Scores, dtype: int64


In [6]:
import pandas as pd

s1 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
s2 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

print(s1 + s2)      # Addition
print(s1 - s2)      # Subtraction
print(s1 * s2)      # Multiplication
print(s1 / s2)      # Division
s3 = pd.Series([100, 200, 300], index=['a', 'b', 'd'])

print(s1 + s3)  # Will show NaN where indexes do not match
print(s1 > 15)         # Returns a boolean Series
print(s1[s1 > 15])     # Filters values > 15
print(s1.sum())        # Sum of elements
print(s1.mean())       # Mean
print(s1.max())        # Maximum
print(s1.min())        # Minimum
print(s1.sort_values())  # Sort by value
print(s1.sort_index())   # Sort by index


a    11
b    22
c    33
dtype: int64
a     9
b    18
c    27
dtype: int64
a    10
b    40
c    90
dtype: int64
a    10.0
b    10.0
c    10.0
dtype: float64
a    110.0
b    220.0
c      NaN
d      NaN
dtype: float64
a    False
b     True
c     True
dtype: bool
b    20
c    30
dtype: int64
60
20.0
30
10
a    10
b    20
c    30
dtype: int64
a    10
b    20
c    30
dtype: int64


In [7]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 22],
    'City': ['Delhi', 'Mumbai', 'Bangalore']
}

df = pd.DataFrame(data)
print(df)


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   22  Bangalore


In [8]:
print(df.head())      # First 5 rows
print(df.tail(2))     # Last 2 rows
print(df['Name'])             # Single column (as Series)
print(df[['Name', 'Age']])    # Multiple columns (as DataFrame)
print(df.loc[0])     # Row by label/index (named/indexed)
print(df.iloc[1])    # Row by position


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   22  Bangalore
      Name  Age       City
1      Bob   30     Mumbai
2  Charlie   22  Bangalore
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   22
Name    Alice
Age        25
City    Delhi
Name: 0, dtype: object
Name       Bob
Age         30
City    Mumbai
Name: 1, dtype: object


In [9]:
print(df.shape)       # Rows, Columns
print(df.columns)     # Column names
print(df.index)       # Row indices
print(df.dtypes)      # Data types
print(df.info())      # Full info
print(df.describe())  # Statistical summary


(3, 3)
Index(['Name', 'Age', 'City'], dtype='object')
RangeIndex(start=0, stop=3, step=1)
Name    object
Age      int64
City    object
dtype: object
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes
None
             Age
count   3.000000
mean   25.666667
std     4.041452
min    22.000000
25%    23.500000
50%    25.000000
75%    27.500000
max    30.000000


In [10]:
df['Salary'] = [50000, 60000, 45000]
print(df)
df.at[0, 'Age'] = 26           # Modify by label
df.iat[1, 2] = 'Pune'          # Modify by position
print(df)
del df['Salary']              # OR: df.drop('Salary', axis=1, inplace=True)
print(df)


      Name  Age       City  Salary
0    Alice   25      Delhi   50000
1      Bob   30     Mumbai   60000
2  Charlie   22  Bangalore   45000
      Name  Age       City  Salary
0    Alice   26      Delhi   50000
1      Bob   30       Pune   60000
2  Charlie   22  Bangalore   45000
      Name  Age       City
0    Alice   26      Delhi
1      Bob   30       Pune
2  Charlie   22  Bangalore


In [11]:
print(df[df['Age'] > 25])         # Age greater than 25
print(df[df['City'] == 'Delhi'])  # People from Delhi
print(df[(df['Age'] > 25) & (df['City'] == 'Pune')])
print(df.sort_values('Age'))              # Ascending
print(df.sort_values('Name', ascending=False))  # Descending


    Name  Age   City
0  Alice   26  Delhi
1    Bob   30   Pune
    Name  Age   City
0  Alice   26  Delhi
  Name  Age  City
1  Bob   30  Pune
      Name  Age       City
2  Charlie   22  Bangalore
0    Alice   26      Delhi
1      Bob   30       Pune
      Name  Age       City
2  Charlie   22  Bangalore
1      Bob   30       Pune
0    Alice   26      Delhi


In [12]:
import pandas as pd

# Create data
data = {
    'Name': ['Naga', 'Rahul', 'Priya'],
    'Age': [21, 22, 23],
    'City': ['Hyderabad', 'Chennai', 'Bangalore']
}

df = pd.DataFrame(data)

# Save to CSV
df.to_csv('data.csv', index=False)

# Read and print to confirm
df_loaded = pd.read_csv('data.csv')
print(df_loaded)


    Name  Age       City
0   Naga   21  Hyderabad
1  Rahul   22    Chennai
2  Priya   23  Bangalore


In [13]:
import pandas as pd

data = {
    'Name': ['Naga', 'Rahul', 'Priya'],
    'Age': [21, 22, 23],
    'City': ['Hyderabad', 'Chennai', 'Bangalore']
}

df = pd.DataFrame(data)

# Select a single column
print(df['Name'])

# Select multiple columns
print(df[['Name', 'Age']])

# Select a row by index
print(df.loc[0])   # by label/index
print(df.iloc[1])  # by position


0     Naga
1    Rahul
2    Priya
Name: Name, dtype: object
    Name  Age
0   Naga   21
1  Rahul   22
2  Priya   23
Name         Naga
Age            21
City    Hyderabad
Name: 0, dtype: object
Name      Rahul
Age          22
City    Chennai
Name: 1, dtype: object


In [14]:
# Filter where age > 21
print(df[df['Age'] > 21])

# Filter by multiple conditions (Age > 21 and City is Chennai)
print(df[(df['Age'] > 21) & (df['City'] == 'Chennai')])
# Add new column
df['Score'] = [88, 92, 79]

# Modify column
df['Age'] = df['Age'] + 1

print(df)
# Drop a column
df = df.drop('Score', axis=1)

# Drop a row by index
df = df.drop(0, axis=0)

print(df)


    Name  Age       City
1  Rahul   22    Chennai
2  Priya   23  Bangalore
    Name  Age     City
1  Rahul   22  Chennai
    Name  Age       City  Score
0   Naga   22  Hyderabad     88
1  Rahul   23    Chennai     92
2  Priya   24  Bangalore     79
    Name  Age       City
1  Rahul   23    Chennai
2  Priya   24  Bangalore


In [15]:
import pandas as pd

data = {
    'Department': ['CSE', 'CSE', 'ECE', 'ECE', 'MECH'],
    'Student': ['A', 'B', 'C', 'D', 'E'],
    'Marks': [85, 90, 78, 92, 88]
}

df = pd.DataFrame(data)
print(df)
# Group by 'Department' and get average marks
print(df.groupby('Department')['Marks'].mean())
# Apply multiple aggregation functions
print(df.groupby('Department')['Marks'].agg(['mean', 'max', 'min']))
# You can also group by multiple columns
df2 = pd.DataFrame({
    'Dept': ['CSE', 'CSE', 'ECE', 'ECE'],
    'Year': [1, 2, 1, 2],
    'Marks': [80, 90, 70, 85]
})

print(df2.groupby(['Dept', 'Year'])['Marks'].mean())


  Department Student  Marks
0        CSE       A     85
1        CSE       B     90
2        ECE       C     78
3        ECE       D     92
4       MECH       E     88
Department
CSE     87.5
ECE     85.0
MECH    88.0
Name: Marks, dtype: float64
            mean  max  min
Department                
CSE         87.5   90   85
ECE         85.0   92   78
MECH        88.0   88   88
Dept  Year
CSE   1       80.0
      2       90.0
ECE   1       70.0
      2       85.0
Name: Marks, dtype: float64


In [16]:
import pandas as pd
import numpy as np

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, np.nan, 30, np.nan, 22],
    'City': ['Delhi', 'Mumbai', np.nan, 'Chennai', 'Kolkata']
}

df = pd.DataFrame(data)
print(df)
print(df.isnull())        # Returns True for NaNs
print(df.isnull().sum())  # Count of NaNs per column
df_dropped = df.dropna()  # Drops rows with any NaN
print(df_dropped)

# Drop only if *all* values in a row are NaN
df_dropped_all = df.dropna(how='all')
df_filled = df.fillna(0)  # Replaces NaN with 0
print(df_filled)

# Fill with specific values
df_filled = df.fillna({'Age': df['Age'].mean(), 'City': 'Unknown'})
print(df_filled)
print(df.fillna(method='ffill'))  # Fill with forward value
print(df.fillna(method='bfill'))  # Fill with backward value
print(df.isnull().values.any())  # Returns True if any NaN present


      Name   Age     City
0    Alice  25.0    Delhi
1      Bob   NaN   Mumbai
2  Charlie  30.0      NaN
3    David   NaN  Chennai
4      Eve  22.0  Kolkata
    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False   True
3  False   True  False
4  False  False  False
Name    0
Age     2
City    1
dtype: int64
    Name   Age     City
0  Alice  25.0    Delhi
4    Eve  22.0  Kolkata
      Name   Age     City
0    Alice  25.0    Delhi
1      Bob   0.0   Mumbai
2  Charlie  30.0        0
3    David   0.0  Chennai
4      Eve  22.0  Kolkata
      Name        Age     City
0    Alice  25.000000    Delhi
1      Bob  25.666667   Mumbai
2  Charlie  30.000000  Unknown
3    David  25.666667  Chennai
4      Eve  22.000000  Kolkata
      Name   Age     City
0    Alice  25.0    Delhi
1      Bob  25.0   Mumbai
2  Charlie  30.0   Mumbai
3    David  30.0  Chennai
4      Eve  22.0  Kolkata
      Name   Age     City
0    Alice  25.0    Delhi
1      Bob  30.0   Mumbai
2  Charlie  30.

  print(df.fillna(method='ffill'))  # Fill with forward value
  print(df.fillna(method='bfill'))  # Fill with backward value
