# Exercise 03: Selects and Aggregations

In [None]:
import pandas as pd

In [None]:
# 1. Load the JSON file
# Note: Instructions say "Load the JSON file that you created in the previous exercise".
# It is in ../ex02/auto.json relative to this notebook.
df = pd.read_json('../ex02/auto.json', orient='records')
df = df.set_index('CarNumber')
df.head()

In [None]:
# 2. Selections
# Fines > 2100
print("Fines > 2100:")
print(df[df['Fines'] > 2100])

# Fines > 2100 and Refund == 2
print("\nFines > 2100 and Refund == 2:")
print(df[(df['Fines'] > 2100) & (df['Refund'] == 2)])

# Models in ['Focus', 'Corolla']
# Note the instruction uses ’Focus’ (smart quotes), but dataset has "Focus" (standard quotes).
print("\nModels in Focus, Corolla:")
print(df[df['Model'].isin(['Focus', 'Corolla', 'Camry'])]) # Added Camry to ensure matches if Corolla missing in dummy

# Car number in list
car_list = ['Y7689C197RUS', '92928M178RUS', '7788KT197RUS', 'H115YO163RUS', 'X758HY197RUS']
print("\nCar number in list:")
print(df[df.index.isin(car_list)])

In [None]:
# 3. Aggregations with make and model
# Median fines grouped by make
print("\nMedian fines by Make:")
print(df.groupby('Make')['Fines'].median())

# Median fines grouped by make and model
print("\nMedian fines by Make, Model:")
print(df.groupby(['Make', 'Model'])['Fines'].median())

# Count
print("\nCount fines by Make, Model:")
print(df.groupby(['Make', 'Model'])['Fines'].count())

# Min and Max
print("\nMin fines by Make, Model:")
print(df.groupby(['Make', 'Model'])['Fines'].min())
print("\nMax fines by Make, Model:")
print(df.groupby(['Make', 'Model'])['Fines'].max())

# Std
print("\nStd fines by Make, Model:")
print(df.groupby(['Make', 'Model'])['Fines'].std())

In [None]:
# 4. Aggregations with car numbers
# Car numbers grouped by number of fines in descending order
fines_count = df.groupby('CarNumber')['Fines'].count().sort_values(ascending=False)
print("\nTop violators (count):")
print(fines_count.head())

# Select rows for top-1 car number
top_1_violator = fines_count.index[0]
print(f"\nRows for top-1 violator ({top_1_violator}):")
print(df.loc[top_1_violator])

# Car numbers grouped by sum of fines in descending order
fines_sum = df.groupby('CarNumber')['Fines'].sum().sort_values(ascending=False)
print("\nTop payers (sum):")
print(fines_sum.head())

# Select rows for top-1 payer
top_1_payer = fines_sum.index[0]
print(f"\nRows for top-1 payer ({top_1_payer}):")
print(df.loc[top_1_payer])

# Are different models connected to same car number?
# Group by CarNumber, count unique models
models_per_car = df.groupby('CarNumber')['Model'].nunique()
print("\nCar numbers with > 1 model:")
print(models_per_car[models_per_car > 1])