# Creating and Loading DataFrames
Create sample data dictionary and convert it to a Pandas DataFrame using pd.DataFrame()

In [2]:
import pandas as pd
# Creating a sample car dataset
data = {
    'Company': ['Ford', 'BMW', 'Ford', 'Toyota', 'BMW', 'Toyota', 'Ford', 'BMW', 'Ford', 'Toyota'],
    'Model': ['Mustang', 'X5', 'Fiesta', 'Corolla', 'M3', 'Camry', 'Focus', 'X3', 'Explorer', 'RAV4'],
    'Price': [40000, 60000, 20000, 25000, 70000, 30000, 22000, 45000, 35000, 27000],
    'Mileage': [15, 12, 18, 20, 10, 21, 19, 13, 16, 22]
}

# Creating the dataframe
car_df = pd.DataFrame(data)

# Basic DataFrame Information
Explore DataFrame structure using info(), describe(), and shape attributes

In [3]:
# Basic DataFrame Information

# Display the structure of the DataFrame
car_df.info()

# Display summary statistics of the DataFrame
car_df.describe()

# Display the shape of the DataFrame
car_df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Company  10 non-null     object
 1   Model    10 non-null     object
 2   Price    10 non-null     int64 
 3   Mileage  10 non-null     int64 
dtypes: int64(2), object(2)
memory usage: 452.0+ bytes


(10, 4)

# Value Counts and Grouping
Use value_counts() for frequency analysis and groupby() for aggregate operations

In [4]:
# Value Counts and Grouping

# 1. Counting total cars per company
cars_per_company = car_df['Company'].value_counts()
cars_per_company

# 2. Finding the average mileage of each car-making company
average_mileage = car_df.groupby('Company')['Mileage'].mean()
average_mileage

Company
BMW       11.666667
Ford      17.000000
Toyota    21.000000
Name: Mileage, dtype: float64

# Indexing and Selection
Demonstrate loc[], iloc[], and boolean indexing for data selection

In [5]:
# Indexing and Selection

# Using loc[] for label-based indexing
# Select all rows for the 'Company' and 'Model' columns
company_model = car_df.loc[:, ['Company', 'Model']]
company_model

# Select rows where the 'Company' is 'BMW'
bmw_cars = car_df.loc[car_df['Company'] == 'BMW']
bmw_cars

# Using iloc[] for position-based indexing
# Select the first 3 rows and first 2 columns
first_three_rows = car_df.iloc[:3, :2]
first_three_rows

# Select the last 2 rows
last_two_rows = car_df.iloc[-2:]
last_two_rows

# Using boolean indexing
# Select cars with a price greater than 30000
expensive_cars = car_df[car_df['Price'] > 30000]
expensive_cars

# Select cars with mileage less than 15
low_mileage_cars = car_df[car_df['Mileage'] < 15]
low_mileage_cars

Unnamed: 0,Company,Model,Price,Mileage
1,BMW,X5,60000,12
4,BMW,M3,70000,10
7,BMW,X3,45000,13


# Sorting and Filtering
Use sort_values() for sorting and boolean conditions for filtering data

In [6]:
# Sorting and Filtering

# Sorting the DataFrame by the 'Price' column in descending order
sorted_by_price = car_df.sort_values(by='Price', ascending=False)
sorted_by_price

# Sorting the DataFrame by the 'Mileage' column in ascending order
sorted_by_mileage = car_df.sort_values(by='Mileage', ascending=True)
sorted_by_mileage

# Filtering the DataFrame to include only cars with a price greater than 30000
filtered_expensive_cars = car_df[car_df['Price'] > 30000]
filtered_expensive_cars

# Filtering the DataFrame to include only cars with mileage less than 15
filtered_low_mileage_cars = car_df[car_df['Mileage'] < 15]
filtered_low_mileage_cars

Unnamed: 0,Company,Model,Price,Mileage
1,BMW,X5,60000,12
4,BMW,M3,70000,10
7,BMW,X3,45000,13


# Aggregation Operations
Perform mean(), sum(), and other aggregation operations on grouped data

In [7]:
# Aggregation Operations

# Grouping the data by 'Company' and calculating the mean of each group
mean_values = car_df.groupby('Company').mean()
mean_values

# Grouping the data by 'Company' and calculating the sum of each group
sum_values = car_df.groupby('Company').sum()
sum_values

# Grouping the data by 'Company' and calculating the maximum value of each group
max_values = car_df.groupby('Company').max()
max_values

# Grouping the data by 'Company' and calculating the minimum value of each group
min_values = car_df.groupby('Company').min()
min_values

TypeError: agg function failed [how->mean,dtype->object]