# Task 1: Pandas Basics

## Create a Series from scratch

In [2]:
import pandas as pd
marks_series = pd.Series([85, 90, 78, 92, 88], name="Marks")
print("Series:")
print(marks_series)
data = {
    "Name": ["Shaik", "Zoya", "Ravi", "Anjali", "Kiran"],
    "Department": ["ECE", "CSE", "IT", "EEE", "ME"],
    "Marks": [85, 90, 78, 92, 88]
}

df = pd.DataFrame(data)
print("\nDataFrame:")
print(df)
print("\nFirst 3 rows:")
print(df.head(3))

print("\nLast 2 rows:")
print(df.tail(2))

print("\nShape of DataFrame:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nIndex:", df.index.tolist())

print("\nData Types:")
print(df.dtypes)

Series:
0    85
1    90
2    78
3    92
4    88
Name: Marks, dtype: int64

DataFrame:
     Name Department  Marks
0   Shaik        ECE     85
1    Zoya        CSE     90
2    Ravi         IT     78
3  Anjali        EEE     92
4   Kiran         ME     88

First 3 rows:
    Name Department  Marks
0  Shaik        ECE     85
1   Zoya        CSE     90
2   Ravi         IT     78

Last 2 rows:
     Name Department  Marks
3  Anjali        EEE     92
4   Kiran         ME     88

Shape of DataFrame: (5, 3)

Columns: ['Name', 'Department', 'Marks']

Index: [0, 1, 2, 3, 4]

Data Types:
Name          object
Department    object
Marks          int64
dtype: object


# Task 2: Load and View Data

In [8]:
# import pandas as pd
df = pd.read_csv("CARS_1.csv")
df.head()
df.info()

# Summary statistics
df.describe()
null_counts = df.isnull().sum()
print("Null values in each column:")
print(null_counts)
print(df.dtypes)
print("Data types:")

# Get number of rows and columns
print("Row and column count:", df.shape)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 443 entries, 0 to 442
Data columns (total 16 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name_of_car  428 non-null    object 
 1   Model        429 non-null    object 
 2   Type         409 non-null    object 
 3   Origin       428 non-null    object 
 4   DriveTrain   428 non-null    object 
 5   MSRP         428 non-null    object 
 6   Invoice      406 non-null    object 
 7   EngineSize   428 non-null    float64
 8   Cylinders    426 non-null    float64
 9   Horsepower   343 non-null    float64
 10  MPG_City     428 non-null    float64
 11  MPG_Highway  428 non-null    float64
 12  Weight       428 non-null    float64
 13  Wheelbase    428 non-null    float64
 14  Length       428 non-null    float64
 15  Unnamed: 15  2 non-null      float64
dtypes: float64(9), object(7)
memory usage: 55.5+ KB
Null values in each column:
name_of_car     15
Model           14
Type            34
O

# Task 3: Access & Filter Data 

In [9]:
import pandas as pd
data = {
    "Name": ["Shaik", "Zoya", "Ravi", "Anjali"],
    "Math": [85, 90, 78, 92],
    "Science": [88, 84, 75, 91]
}
df = pd.DataFrame(data)
print("Math column:")
print(df['Math'])
print("\nMath and Science columns:")
print(df[['Math', 'Science']])
print("\nRow with index 1 (loc):")
print(df.loc[1])
print("\nRow at position 2 (iloc):")
print(df.iloc[2])
df['Average'] = (df['Math'] + df['Science']) / 2
print("\nDataFrame with 'Average' column:")
print(df)
df_dropped_col = df.drop(columns=['Science'])
print("\nAfter dropping 'Science' column:")
print(df_dropped_col)
df_dropped_row = df.drop(index=2)
print("\nAfter dropping row at index 2:")
print(df_dropped_row)


Math column:
0    85
1    90
2    78
3    92
Name: Math, dtype: int64

Math and Science columns:
   Math  Science
0    85       88
1    90       84
2    78       75
3    92       91

Row with index 1 (loc):
Name       Zoya
Math         90
Science      84
Name: 1, dtype: object

Row at position 2 (iloc):
Name       Ravi
Math         78
Science      75
Name: 2, dtype: object

DataFrame with 'Average' column:
     Name  Math  Science  Average
0   Shaik    85       88     86.5
1    Zoya    90       84     87.0
2    Ravi    78       75     76.5
3  Anjali    92       91     91.5

After dropping 'Science' column:
     Name  Math  Average
0   Shaik    85     86.5
1    Zoya    90     87.0
2    Ravi    78     76.5
3  Anjali    92     91.5

After dropping row at index 2:
     Name  Math  Science  Average
0   Shaik    85       88     86.5
1    Zoya    90       84     87.0
3  Anjali    92       91     91.5


#  Task 4: Built-in Methods 

In [10]:
import pandas as pd
data = {
    "Name": ["Shaik", "Zoya", "Ravi", "Anjali", "Zoya"],
    "Marks": [85, 90, 78, 92, 90],
    "Subject": ["Math", "Science", "Math", "Science", "Math"]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
df_sorted = df.sort_values(by='Marks', ascending=False)
print("\nSorted by Marks (descending):")
print(df_sorted)
name_counts = df['Name'].value_counts()
print("\nValue counts for 'Name':")
print(name_counts)
unique_subjects = df['Subject'].unique()
print("\nUnique subjects:")
print(unique_subjects)
print("\nMean of Marks:", df['Marks'].mean())
print("Sum of Marks:", df['Marks'].sum())
print("Minimum Marks:", df['Marks'].min())
print("Maximum Marks:", df['Marks'].max())


Original DataFrame:
     Name  Marks  Subject
0   Shaik     85     Math
1    Zoya     90  Science
2    Ravi     78     Math
3  Anjali     92  Science
4    Zoya     90     Math

Sorted by Marks (descending):
     Name  Marks  Subject
3  Anjali     92  Science
1    Zoya     90  Science
4    Zoya     90     Math
0   Shaik     85     Math
2    Ravi     78     Math

Value counts for 'Name':
Name
Zoya      2
Shaik     1
Ravi      1
Anjali    1
Name: count, dtype: int64

Unique subjects:
['Math' 'Science']

Mean of Marks: 87.0
Sum of Marks: 435
Minimum Marks: 78
Maximum Marks: 92
