# Pandas: Data Frames

In [1]:
# pip install pandas

import pandas as pd


In [2]:
data = {
    'Car': ['BMW 7 Series', 'Mercedes S-Class', 'Audi A8', 'Lexus LS', 'Porsche Panamera'],
    'MPG_City': [18, 19, 19, 18, 18],
    'MPG_Highway': [26, 28, 27, 29, 27],
    'Year': [2022, 2022, 2022, 2022, 2022]
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

print("DataFrame Info:")
print(df.info())

print("\nDataFrame Shape:", df.shape)
print("\nDataFrame Columns:", df.columns.tolist())
print("\nFirst 5 Rows:\n", df)
print("\nSummary Statistics:\n", df.describe())


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Car          5 non-null      object
 1   MPG_City     5 non-null      int64 
 2   MPG_Highway  5 non-null      int64 
 3   Year         5 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 292.0+ bytes
None

DataFrame Shape: (5, 4)

DataFrame Columns: ['Car', 'MPG_City', 'MPG_Highway', 'Year']

First 5 Rows:
                 Car  MPG_City  MPG_Highway  Year
0      BMW 7 Series        18           26  2022
1  Mercedes S-Class        19           28  2022
2           Audi A8        19           27  2022
3          Lexus LS        18           29  2022
4  Porsche Panamera        18           27  2022

Summary Statistics:
         MPG_City  MPG_Highway    Year
count   5.000000     5.000000     5.0
mean   18.400000    27.400000  2022.0
std     0.547723     1.140175     0.0
min   

## Read CSV

In [3]:
# Load the CSV file into a DataFrame
df = pd.read_csv('automobile_data.csv')

# Display basic information about the DataFrame
print("DataFrame Info:")
print(df.info())

print("\nDataFrame Shape:", df.shape)
print("\nDataFrame Columns:", df.columns.tolist())
print("\nFirst 3 Rows:\n", df.head(3))
print("\nLast 3 Rows:\n", df.tail(3))
print("\nSummary Statistics:\n", df.describe())


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Make              91 non-null     object 
 1   Model             91 non-null     object 
 2   Year              91 non-null     float64
 3   Mileage           91 non-null     float64
 4   Miles Per Gallon  91 non-null     float64
 5   engine size       91 non-null     object 
 6   Transmission      91 non-null     object 
 7   FuelType          91 non-null     object 
 8   Price             81 non-null     float64
dtypes: float64(4), object(5)
memory usage: 6.7+ KB
None

DataFrame Shape: (94, 9)

DataFrame Columns: ['Make', 'Model', 'Year', 'Mileage', 'Miles Per Gallon', 'engine size', 'Transmission', 'FuelType', 'Price']

First 3 Rows:
           Make      Model    Year  Mileage  Miles Per Gallon engine size  \
0      Ferrari    488 GTB  2021.0  12000.0              18.0 

## Filterting DataFrames

In [4]:
# Filter DataFrame to just show Make col
auto_make = df['Make']
print(auto_make.head())

print()

print(df['Make'].head(10))


0        Ferrari
1        Ferrari
2    Lamborghini
3    Lamborghini
4        Porsche
Name: Make, dtype: object

0         Ferrari
1         Ferrari
2     Lamborghini
3     Lamborghini
4         Porsche
5         Porsche
6         Bentley
7    Aston Martin
8     Rolls-Royce
9        Maserati
Name: Make, dtype: object


In [5]:
# Filter cars with 'Miles Per Gallon' greater than 20
mpg_filter = df['Miles Per Gallon'] > 20
filtered_df = df[mpg_filter]
print(filtered_df.head(10))


            Make                Model    Year  Mileage  Miles Per Gallon  \
12          Audi                  RS7  2022.0   9000.0              21.0   
14         Tesla        Model S Plaid  2022.0   4000.0             102.0   
16       Ferrari            Portofino  2022.0   5000.0              22.0   
20  Aston Martin              Vantage  2022.0   6000.0              21.0   
25          Audi                   S8  2021.0   9000.0              22.0   
27         Tesla        Model X Plaid  2021.0   3000.0              96.0   
28       McLaren                   GT  2022.0   6000.0              22.0   
35      Maserati                 MC20  2021.0   8000.0              24.0   
40         Tesla  Model Y Performance  2022.0   4000.0              91.0   
41       McLaren               Artura  2021.0  10000.0              50.0   

       engine size Transmission  FuelType     Price  
12         4.0L V8    Automatic    Petrol  135000.0  
14        Electric    Automatic  Electric  135000.0  
1

In [6]:
# Filter cars with Petrol FuelType, 'Miles Per Gallon' greater than 20
filter = ((df['FuelType'] == 'Petrol') & (df['Miles Per Gallon'] > 20))
filtered_df = df[filter]
print(filtered_df.head(10))


            Make        Model    Year  Mileage  Miles Per Gallon engine size  \
12          Audi          RS7  2022.0   9000.0              21.0     4.0L V8   
16       Ferrari    Portofino  2022.0   5000.0              22.0     3.9L V8   
20  Aston Martin      Vantage  2022.0   6000.0              21.0     4.0L V8   
25          Audi           S8  2021.0   9000.0              22.0     4.0L V8   
28       McLaren           GT  2022.0   6000.0              22.0     4.0L V8   
35      Maserati         MC20  2021.0   8000.0              24.0     3.0L V6   
59  Aston Martin       Cygnet  2021.0   7000.0              56.0     1.3L I4   
65        Jaguar  E-Pace P300  2021.0   6000.0              23.0     2.0L I4   
77          Audi    RS6 Avant  2021.0   9000.0              21.0     4.0L V8   

   Transmission FuelType     Price  
12    Automatic   Petrol  135000.0  
16    Automatic   Petrol  230000.0  
20    Automatic   Petrol  150000.0  
25    Automatic   Petrol  130000.0  
28    Automati

In [7]:
# Filter cars with Petrol FuelType, 'Miles Per Gallon' greater than 20 that are also from the year 2020 or later
filter = ((df['FuelType'] == 'Petrol') & (df['Miles Per Gallon'] > 20) & (df['Year'] >= 2020))
filtered_df = df[filter]
print(filtered_df.head())


            Make      Model    Year  Mileage  Miles Per Gallon engine size  \
12          Audi        RS7  2022.0   9000.0              21.0     4.0L V8   
16       Ferrari  Portofino  2022.0   5000.0              22.0     3.9L V8   
20  Aston Martin    Vantage  2022.0   6000.0              21.0     4.0L V8   
25          Audi         S8  2021.0   9000.0              22.0     4.0L V8   
28       McLaren         GT  2022.0   6000.0              22.0     4.0L V8   

   Transmission FuelType     Price  
12    Automatic   Petrol  135000.0  
16    Automatic   Petrol  230000.0  
20    Automatic   Petrol  150000.0  
25    Automatic   Petrol  130000.0  
28    Automatic   Petrol  210000.0  


In [8]:
# Filter the filtered_df above for cars that have a price greater than 150,000
filter = (filtered_df['Price'] > 150000)
filtered_df = filtered_df[filter]
print(filtered_df.head())


        Make      Model    Year  Mileage  Miles Per Gallon engine size  \
16   Ferrari  Portofino  2022.0   5000.0              22.0     3.9L V8   
28   McLaren         GT  2022.0   6000.0              22.0     4.0L V8   
35  Maserati       MC20  2021.0   8000.0              24.0     3.0L V6   

   Transmission FuelType     Price  
16    Automatic   Petrol  230000.0  
28    Automatic   Petrol  210000.0  
35    Automatic   Petrol  210000.0  
