### Import

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('car_fuel_efficiency.csv')
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


### Q1. Pandas Version

In [3]:
print("Pandas version:", pd.__version__)

Pandas version: 2.3.2


### Q2. Records Count

In [4]:
print("Number of records:", df.shape[0])

Number of records: 9704


### Q3. Fuel types

In [5]:
print("Number of unique fuel types:", df['fuel_type'].nunique())

Number of unique fuel types: 2


### Q4. Missing Values

In [6]:
No_of_cols_with_missing_values = df.isnull().sum()

print("Number of columns with missing values:", No_of_cols_with_missing_values[No_of_cols_with_missing_values > 0].count())

Number of columns with missing values: 4


### Q5. Max fuel efficency

In [7]:
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


In [8]:
max_fuel_effi_by_origin = df.groupby("origin")["fuel_efficiency_mpg"].max().round(2)
max_fuel_effi_by_origin

origin
Asia      23.76
Europe    25.97
USA       24.97
Name: fuel_efficiency_mpg, dtype: float64

In [9]:
print("Maximum fuel efficiency for cars from Asia:", max_fuel_effi_by_origin['Asia'])

Maximum fuel efficiency for cars from Asia: 23.76


### Median value of horsepower

In [10]:
median_init = df["horsepower"].median()
median_init

149.0

In [11]:
df["horsepower"].mode().values[0]

np.float64(152.0)

In [12]:
df["horsepower"].fillna(df["horsepower"].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["horsepower"].fillna(df["horsepower"].mode()[0], inplace=True)


In [13]:
median_after = df["horsepower"].median()
median_after

152.0

In [14]:
if median_init == median_after:
    print("No change")
elif median_init > median_after:
    print("Yes, it decreased")
else:
    print("Yes, it increased")

Yes, it increased


### Q7. Sum of weights

In [15]:
# Select all the cars from Asia
df = df[df["origin"] == "Asia"]

In [16]:
# Select only columns vehicle_weight and model_year
df = df[["vehicle_weight", "model_year"]]
df

Unnamed: 0,vehicle_weight,model_year
8,2714.219310,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
...,...,...
9688,3948.404625,2018
9692,3680.341381,2016
9693,2545.070139,2012
9698,3107.427820,2005


In [17]:
# Select the first 7 values
df[:7]

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
34,2844.227534,2014
38,3761.994038,2019


In [18]:
# Get the underlying NumPy array. Call it X
X = df[:7].values
X

array([[2714.21930965, 2016.        ],
       [2783.86897424, 2010.        ],
       [3582.68736772, 2007.        ],
       [2231.8081416 , 2011.        ],
       [2659.43145076, 2016.        ],
       [2844.22753389, 2014.        ],
       [3761.99403819, 2019.        ]])

In [19]:
# Compute XTX
XTX = X.T.dot(X)
XTX

array([[62248334.33150762, 41431216.5073268 ],
       [41431216.5073268 , 28373339.        ]])

In [20]:
# Invert XTX
XTX_inv = np.linalg.inv(XTX)
XTX_inv

array([[ 5.71497081e-07, -8.34509443e-07],
       [-8.34509443e-07,  1.25380877e-06]])

In [21]:
# Create an array y
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

In [22]:
# Compute w
XT_XTX_inv = XTX_inv.dot(X.T)
w = XT_XTX_inv.dot(y)

In [23]:
#  Sum of all elements of  w
w.sum()

np.float64(0.5187709081074007)