In [32]:
import pandas as pd
import numpy as np

In [33]:
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv')

### Q2 RECORDS

In [34]:
# RECORDS COUNT
len(df)


9704

### Q1 PANDAS VERSION

In [35]:
# PANDAS VERSION
pd.__version__

'2.3.1'

In [36]:
df.columns

Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',
       'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',
       'num_doors', 'fuel_efficiency_mpg'],
      dtype='object')

### Q3 FUEL TYPES

In [37]:
# FUEL TYPE
len(df['fuel_type'].unique())

2

### Q4 MISSING VALUES

In [38]:
# NBR COLUMNS WITH MISSING VALUES
missing_columns = df.isnull().any(axis=0).sum()
missing_columns

np.int64(4)

In [39]:
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


### Q5 MAX FUEL EFFICIENCY

In [40]:
# MAX FUEL EFFICIENCY
df_asia = df[df['origin']=='Asia']
max_value = df_asia['fuel_efficiency_mpg'].max()
max_value

np.float64(23.759122836520497)

### Q6 MEDIAN VALUE OF HORSEPOWER

In [41]:
# MEDIAN VALUE HORSEPOWER
df['horsepower'].median()

np.float64(149.0)

In [42]:
# MOST FREQUENT
most_frequent = df['horsepower'].mode()
most_frequent

0    152.0
Name: horsepower, dtype: float64

In [43]:
# FILL MISSING VALUES
df2= df.fillna(value={'horsepower': most_frequent})

In [44]:
# RECALCULATE THE MEDIAN VALUE
df2['horsepower'] .median()

np.float64(149.0)

### Q7 SUM OF WEIGHTS

In [45]:
df_asia_weight_year = df_asia[['vehicle_weight','model_year']]
df_7 = df_asia_weight_year.head(7)
df_7

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
34,2844.227534,2014
38,3761.994038,2019


In [46]:
x = df_7.to_numpy()
x

array([[2714.21930965, 2016.        ],
       [2783.86897424, 2010.        ],
       [3582.68736772, 2007.        ],
       [2231.8081416 , 2011.        ],
       [2659.43145076, 2016.        ],
       [2844.22753389, 2014.        ],
       [3761.99403819, 2019.        ]])

In [47]:
x[:,0]

array([2714.21930965, 2783.86897424, 3582.68736772, 2231.8081416 ,
       2659.43145076, 2844.22753389, 3761.99403819])

In [48]:
x.shape[1]

2

In [49]:
num_rows = x.shape[0]
num_cols = x.shape[1]
result = np.zeros((num_rows,num_cols))
result

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

* MATRIX MATIRX MULTIPLICATION

In [51]:
# we start by creating a vector-vector function

def vector_vector_multip(u,v):
    assert u.shape[0] == v.shape[0]

    n = u.shape[0]

    result = 0.0

    for i in range(n):
        result = result + u[i] * v[i]

    return result

In [53]:
# Then we define a matrix vector multiplication

def matrix_vector_multip(U, v):
    assert U.shape[1] == v.shape[0]

    num_rows = U.shape[0]

    result = np.zeros(num_rows)

    for i in range(num_rows):
        result[i] = vector_vector_multip(U[i], v)
    
    return result

In [55]:
def matrix_matrix_multip(U ,V):
    assert U.shape[1] == V.shape[0]

    num_rows = U.shape[0]
    num_cols = V.shape[1]

    result = np.zeros((num_rows,num_cols))

    for i in range(num_cols):
        vi = V[:,i]
        Uvi = matrix_vector_multip(U, vi)
        result[:,i] = Uvi

    return result


In [59]:
XT = x.T

In [60]:
XTX = matrix_matrix_multip(XT,x)
XTX

array([[62248334.33150762, 41431216.50732678],
       [41431216.50732678, 28373339.        ]])

In [63]:
XTX_inv = np.linalg.inv(XTX)
XTX_inv


array([[ 5.71497081e-07, -8.34509443e-07],
       [-8.34509443e-07,  1.25380877e-06]])

In [81]:
XTX_inv_XT = XTX_inv.dot(XT)
XTX_inv_XT.shape

(2, 7)

In [84]:
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
y.shape

(7,)

In [85]:
XTX_inv_XT.shape[1] == y.shape[0]

True

In [86]:
w = matrix_vector_multip(XTX_inv_XT,y)
w

array([0.01386421, 0.5049067 ])

In [88]:
final = np.sum(w)
final

np.float64(0.5187709081074002)