In [None]:
import pandas as pd
import seaborn as sns


# Define the headers since the data does not have any
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
           "num_doors", "body_style", "drive_wheels", "engine_location",
           "wheel_base", "length", "width", "height", "curb_weight",
           "engine_type", "num_cylinders", "engine_size", "fuel_system",
           "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
           "city_mpg", "highway_mpg", "price"]

# Read in the CSV file and convert "?" to NaN
df_raw = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data",
                     header=None, names=headers, na_values="?" )

# Define a list of models that we want to review
models = ["toyota","nissan","mazda", "honda", "mitsubishi", "subaru", "volkswagen", "volvo"]

# Create a copy of the data with only the top 8 manufacturers
df = df_raw[df_raw.make.isin(models)].copy()

In [16]:
pd.crosstab(df.make, df.fuel_type)

fuel_type,diesel,gas
make,Unnamed: 1_level_1,Unnamed: 2_level_1
honda,0,13
mazda,2,15
mitsubishi,0,13
nissan,1,17
subaru,0,12
toyota,3,29
volkswagen,4,8
volvo,1,10


### Including Totals

In [18]:
pd.crosstab(df.make, df.fuel_type, margins=True, margins_name="Total")

fuel_type,diesel,gas,Total
make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
honda,0,13,13
mazda,2,15,17
mitsubishi,0,13,13
nissan,1,17,18
subaru,0,12,12
toyota,3,29,32
volkswagen,4,8,12
volvo,1,10,11
Total,11,117,128


### Custom aggregate function

In [19]:
pd.crosstab(df.make, df.body_style, values=df.curb_weight, aggfunc='mean').round(0)

body_style,convertible,hardtop,hatchback,sedan,wagon
make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
honda,,,1970.0,2289.0,2024.0
mazda,,,2254.0,2361.0,
mitsubishi,,,2377.0,2394.0,
nissan,,2008.0,2740.0,2238.0,2452.0
subaru,,,2137.0,2314.0,2454.0
toyota,2975.0,2585.0,2370.0,2338.0,2708.0
volkswagen,2254.0,,2221.0,2342.0,2563.0
volvo,,,,3023.0,3078.0


### Grouping

In [21]:
pd.crosstab(df.make, [df.body_style, df.drive_wheels])


body_style,convertible,convertible,hardtop,hardtop,hatchback,hatchback,hatchback,sedan,sedan,sedan,wagon,wagon,wagon
drive_wheels,fwd,rwd,fwd,rwd,4wd,fwd,rwd,4wd,fwd,rwd,4wd,fwd,rwd
make,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
honda,0,0,0,0,0,7,0,0,5,0,0,1,0
mazda,0,0,0,0,0,6,4,0,5,2,0,0,0
mitsubishi,0,0,0,0,0,9,0,0,4,0,0,0,0
nissan,0,0,1,0,0,2,3,0,9,0,0,3,0
subaru,0,0,0,0,1,2,0,2,3,0,2,2,0
toyota,0,1,0,3,0,8,6,0,7,3,2,1,1
volkswagen,1,0,0,0,0,1,0,0,9,0,0,1,0
volvo,0,0,0,0,0,0,0,0,0,8,0,0,3
