In [18]:
import pandas as pd
import numpy as np

# Q1. Pandas version

In [27]:
pd.__version__


'2.3.3'

In [3]:
# Download and read the car fuel efficiency dataset
url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv"
df = pd.read_csv(url)

# Display basic info about the dataset
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()

Dataset shape: (9704, 11)
Columns: ['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight', 'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain', 'num_doors', 'fuel_efficiency_mpg']


Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


# Q2. Records count

In [26]:
df.shape[0]

9704

# Q3. Fuel types

In [25]:
df['fuel_type'].value_counts()   

fuel_type
Gasoline    4898
Diesel      4806
Name: count, dtype: int64

# Q4. Missing values

In [4]:
missing_values = df.isnull().sum()
print("Missing values in each column:")
print(missing_values)
print(f"\n Columns with missing values: {missing_values[missing_values > 0].index.tolist()} ")

Missing values in each column:
engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

 Columns with missing values: ['num_cylinders', 'horsepower', 'acceleration', 'num_doors'] 


# Question 5. Max Fuel Efficiency from Asian

In [None]:

max_mpg = df[df['origin'] == 'Asia']['fuel_efficiency_mpg'].max()
print(f"Maximum fuel efficiency (mpg) from Asian cars: {max_mpg}")

Maximum fuel efficiency (mpg) from Asian cars: 23.759122836520497


# Q6: Median value of horsepower
-  Find the median value of the horsepower column in the dataset.
-  Next, calculate the most frequent value of the same horsepower column.
-  Use the fillna method to fill the missing values in the horsepower column with the most frequent value from the previous step.
-  Now, calculate the median value of horsepower once again.

In [12]:
hp_original_mean = df['horsepower'].mean()  
print(f"Original mean horsepower: {hp_original_mean}")

hp_mode = df['horsepower'].mode()
print(f"Most frequent horsepower value: {hp_mode[0]}")

df['hp_frequency_filled'] = df['horsepower'].fillna(hp_mode[0])
print(f"Mean horsepower after filling missing values with mode: {df['hp_frequency_filled'].mean()}")

Original mean horsepower: 149.65729212983547
Most frequent horsepower value: 152.0
Mean horsepower after filling missing values with mode: 149.82821516900248


# Q7: Sum of weights

In [23]:
# Select all cars from asia and only columns vehicle_weight and model_year

df_asia = df[df['origin'] == 'Asia'][['vehicle_weight', 'model_year']]

# get the first 7 rows
asia_rows = df_asia.head(7)

# get the underlying numpy array
asia_array = asia_rows.to_numpy()

# Compute the result of the matrix multiplication between the transposed matrix and the original matrix
XTX = asia_array.T.dot(asia_array)
print("Result of the matrix multiplication:")
print(XTX)

y = [1100, 1300, 800, 900, 1000, 1100, 1200]

# multiply the inverse of XTX with the transposed of asia_array and them multiply by y
w = np.linalg.inv(XTX).dot(asia_array.T) * y
print("Result of the linear regression:")
print(w)

# sum of the elements of w
print("Sum of the elements of w:")
print(w.sum())

Result of the matrix multiplication:
[[62248334.33150762 41431216.5073268 ]
 [41431216.5073268  28373339.        ]]
Result of the linear regression:
[[-0.14432288 -0.11230828  0.29810794 -0.36245398 -0.16251372 -0.06075771
   0.55811286]
 [ 0.28890053  0.2560879  -0.37871378  0.59305003  0.30835783  0.16679975
  -0.72957556]]
Sum of the elements of w:
0.5187709081074016
