# Import Libraries

In [30]:
import pandas as pd
import numpy as np

# Load data set

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv')

In [7]:
df

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.870990,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369
...,...,...,...,...,...,...,...,...,...,...,...
9699,140,5.0,164.0,2981.107371,17.3,2013,Europe,Diesel,Front-wheel drive,,15.101802
9700,180,,154.0,2439.525729,15.0,2004,USA,Gasoline,All-wheel drive,0.0,17.962326
9701,220,2.0,138.0,2583.471318,15.1,2008,USA,Diesel,All-wheel drive,-1.0,17.186587
9702,230,4.0,177.0,2905.527390,19.4,2011,USA,Diesel,Front-wheel drive,1.0,15.331551


# Q1. Pandas version

In [8]:
pd.__version__

'2.3.1'

# Q2. Records count

In [10]:
len(df)

9704

In [9]:
df.count()

engine_displacement    9704
num_cylinders          9222
horsepower             8996
vehicle_weight         9704
acceleration           8774
model_year             9704
origin                 9704
fuel_type              9704
drivetrain             9704
num_doors              9202
fuel_efficiency_mpg    9704
dtype: int64

# Q3. Fuel types

In [11]:
df['fuel_type'].nunique()

2

In [26]:
df['fuel_type'].unique()


array(['Gasoline', 'Diesel'], dtype=object)

# Q4. Missing values

In [27]:
df.isnull().sum()

engine_displacement      0
num_cylinders          482
horsepower               0
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

In [15]:
(df.isnull().sum() > 0).sum()

np.int64(4)

# Q5. Max fuel efficiency

In [29]:
asia_cars = df[df['origin'] == 'Asia']
asia_cars.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
8,250,1.0,174.0,2714.21931,10.3,2016,Asia,Diesel,Front-wheel drive,-1.0,16.823554
12,320,5.0,145.0,2783.868974,15.1,2010,Asia,Diesel,All-wheel drive,1.0,16.17582
14,200,6.0,160.0,3582.687368,14.9,2007,Asia,Diesel,All-wheel drive,0.0,11.871091
20,150,3.0,197.0,2231.808142,18.7,2011,Asia,Gasoline,Front-wheel drive,1.0,18.889083
21,160,4.0,133.0,2659.431451,,2016,Asia,Gasoline,Front-wheel drive,-1.0,16.07773


In [20]:
max_efficiency = asia_cars['fuel_efficiency_mpg'].max()
max_efficiency

np.float64(23.759122836520497)

# Q6. Median value of horsepower

In [24]:
median_before = df['horsepower'].median()
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].mode()[0])
median_after = df['horsepower'].median()

print(f"Median before: {median_before}, Median after: {median_after}")


Median before: 149.0, Median after: 152.0


# Q7. Sum of weights

In [31]:
# Step 1: Filter Asian cars
asia_cars = df[df['origin'] == 'Asia']

# Step 2: Select columns and first 7 rows
X = asia_cars[['vehicle_weight', 'model_year']].iloc[:7].to_numpy()

# Step 3: Compute XTX
XTX = X.T @ X

# Step 4: Invert XTX
XTX_inv = np.linalg.inv(XTX)

# Step 5: Create y
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

# Step 6: Compute w
w = XTX_inv @ X.T @ y

# Step 7: Sum of elements
result_sum = w.sum()
print("Sum of all elements in w:", result_sum)


Sum of all elements in w: 0.5187709081074016
