# Introduction to Machine Learning for Machine Learning Zoomcamp 2025

## Import libraries


In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Q1. Pandas version


In [13]:
print("Pandas version:", pd.__version__)

Pandas version: 2.2.2


## Q2. Load dataset

In [14]:
url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv"
df = pd.read_csv(url)

# Show first few rows
df.head()

# Count records
print("Number of records:", len(df))


Number of records: 9704


## Q3. Fuel types


In [15]:
print("Unique fuel types:", df['fuel_type'].unique())
print("Number of fuel types:", df['fuel_type'].nunique())

Unique fuel types: ['Gasoline' 'Diesel']
Number of fuel types: 2


## Q4. Missing values

In [16]:
missing = df.isnull().sum()
print("Missing values per column:\n", missing)
print("Columns with missing values:", (missing > 0).sum())

Missing values per column:
 engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64
Columns with missing values: 4


## Q5. Max fuel efficiency for Asia

In [18]:
asia = df[df['origin'] == 'Asia']
print("Max fuel efficiency (Asia):", asia['fuel_efficiency_mpg'].max())

Max fuel efficiency (Asia): 23.759122836520497


## Q6. Horsepower median before and after fillna

In [19]:
median_before = df['horsepower'].median()
print("Median horsepower before fillna:", median_before)

# Most frequent (mode) value
mode_value = df['horsepower'].mode()[0]
print("Most frequent horsepower:", mode_value)

# Fill missing values
df['horsepower'] = df['horsepower'].fillna(mode_value)

# Median after filling
median_after = df['horsepower'].median()
print("Median horsepower after fillna:", median_after)

if median_after > median_before:
    print("Yes, it increased")
elif median_after < median_before:
    print("Yes, it decreased")
else:
    print("No")

Median horsepower before fillna: 149.0
Most frequent horsepower: 152.0
Median horsepower after fillna: 152.0
Yes, it increased


## Q7. Sum of weights (Linear regression calculation)


In [20]:
asia = df[df['origin'] == 'Asia'][['vehicle_weight', 'model_year']]

# First 7 values
X = asia.head(7).values
print("X:\n", X)

# XTX = X^T * X
XTX = X.T.dot(X)

# Inverse
XTX_inv = np.linalg.inv(XTX)

# y values
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

# w = (XTX^-1 * X^T * y)
w = XTX_inv.dot(X.T).dot(y)

print("w:", w)
print("Sum of elements in w:", w.sum())

X:
 [[2714.21930965 2016.        ]
 [2783.86897424 2010.        ]
 [3582.68736772 2007.        ]
 [2231.8081416  2011.        ]
 [2659.43145076 2016.        ]
 [2844.22753389 2014.        ]
 [3761.99403819 2019.        ]]
w: [0.01386421 0.5049067 ]
Sum of elements in w: 0.5187709081074016
