In [1]:
import numpy as np
import pandas as pd

### Q1. Pandas version

What's the version of Pandas that you installed?

In [2]:
pd.__version__

'2.3.2'

In [3]:
np.__version__

'2.0.1'

In [4]:
df = pd.read_csv("https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv")

In [5]:
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


### Q2. Records count

In [6]:
len(df)

9704

In [7]:
df.shape[0]

9704

### Q3. Fuel types

In [8]:
df['fuel_type'].nunique()

2

In [9]:
df['fuel_type'].unique()

array(['Gasoline', 'Diesel'], dtype=object)

### Q4. Missing values

In [10]:
df.isnull().sum()

engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

In [11]:
(df.isnull().sum() > 0).sum()

np.int64(4)

### Q5. Max fuel efficiency

In [12]:
df[df['origin']=='Asia']['fuel_efficiency_mpg'].max()

23.759122836520497

### Q6. Median value of horsepower

In [13]:
df['horsepower'].median()

149.0

In [14]:
df['horsepower'].mode()

0    152.0
Name: horsepower, dtype: float64

In [15]:
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].mode()[0])

In [16]:
df.isnull().sum()

engine_displacement      0
num_cylinders          482
horsepower               0
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

In [17]:
df['horsepower'].median()

152.0

### Q7. Sum of weights

In [18]:
# Step 1: Select Asia cars
asia = df[df['origin'] == 'Asia']

In [19]:
asia.head(2)

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
8,250,1.0,174.0,2714.21931,10.3,2016,Asia,Diesel,Front-wheel drive,-1.0,16.823554
12,320,5.0,145.0,2783.868974,15.1,2010,Asia,Diesel,All-wheel drive,1.0,16.17582


In [20]:
# Step 2: Keep only vehicle_weight and model_year
asia_sub = asia[['vehicle_weight', 'model_year']]

In [21]:
#step 3: First 7 rows
asia_7 = asia_sub.head(7)
asia_7

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
34,2844.227534,2014
38,3761.994038,2019


In [22]:
# Step 4: First 7 rows, get numpy array
X = asia_7.to_numpy()
X

array([[2714.21930965, 2016.        ],
       [2783.86897424, 2010.        ],
       [3582.68736772, 2007.        ],
       [2231.8081416 , 2011.        ],
       [2659.43145076, 2016.        ],
       [2844.22753389, 2014.        ],
       [3761.99403819, 2019.        ]])

In [23]:
#step 5: matrix
XTX = X.T @ X
XTX

array([[62248334.33150762, 41431216.50732678],
       [41431216.50732678, 28373339.        ]])

In [24]:
#step6: Invert XTX
XTX_inv = np.linalg.inv(XTX)
XTX_inv

array([[ 5.71497081e-07, -8.34509443e-07],
       [-8.34509443e-07,  1.25380877e-06]])

In [25]:
#step 7: create array
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
y

array([1100, 1300,  800,  900, 1000, 1100, 1200])

In [26]:
#step 8
w = XTX_inv @ X.T @ y
w

array([0.01386421, 0.5049067 ])

In [27]:
# step 9
w.sum()

np.float64(0.5187709081074007)