In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Q1. Pandas version

In [2]:
pd.__version__

'2.2.2'

#### Getting the data

In [3]:
data = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv')
data.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [4]:
data.dtypes

Laptop           object
Status           object
Brand            object
Model            object
CPU              object
RAM               int64
Storage           int64
Storage type     object
GPU              object
Screen          float64
Touch            object
Final Price     float64
dtype: object

In [5]:
data.columns

Index(['Laptop', 'Status', 'Brand', 'Model', 'CPU', 'RAM', 'Storage',
       'Storage type', 'GPU', 'Screen', 'Touch', 'Final Price'],
      dtype='object')

### Q2. Records count
How many records are in the dataset?

In [6]:
print(f'Lenght of data: {len(data)}')

Lenght of data: 2160


### Q3. Laptop brands
How many laptop brands are presented in the dataset?

In [7]:
data.Brand.str.lower().unique()

array(['asus', 'alurin', 'msi', 'hp', 'lenovo', 'medion', 'acer', 'apple',
       'razer', 'gigabyte', 'dell', 'lg', 'samsung', 'pccom', 'microsoft',
       'primux', 'prixton', 'dynabook toshiba', 'thomson', 'denver',
       'deep gaming', 'vant', 'innjoo', 'jetwing', 'millenium', 'realme',
       'toshiba'], dtype=object)

In [8]:
# Number of unique brands
data.Brand.str.lower().nunique()

27

#### Q4. Missing values
How many columns in the dataset have missing values?

In [9]:
data.isnull().sum()

Laptop             0
Status             0
Brand              0
Model              0
CPU                0
RAM                0
Storage            0
Storage type      42
GPU             1371
Screen             4
Touch              0
Final Price        0
dtype: int64

In [10]:
# get number of columns with missing values
missing_values = pd.DataFrame(data=data.isnull().sum(), columns=['missing_values'])
print(f"Number of columns in the dataset with missing values: {len(missing_values[missing_values.missing_values > 0])}")

Number of columns in the dataset with missing values: 3


#### Q5. Maximum final price
What's the maximum final price of Dell notebooks in the dataset?

In [11]:
data.Brand = data.Brand.str.lower()
data.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,msi,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,hp,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [12]:
print(f"maximum final price of Dell notebooks: {float(data[data.Brand == 'dell']['Final Price'].max())}")

maximum final price of Dell notebooks: 3936.0


### Q6. Median value of Screen
- Find the median value of Screen column in the dataset.
- Next, calculate the most frequent value of the same Screen column.
- Use fillna method to fill the missing values in Screen column with the most frequent value from the previous step.
- Now, calculate the median value of Screen once again.

In [13]:
print(f"median value of `Screen` column: {float(data['Screen'].median())}")

median value of `Screen` column: 15.6


In [14]:
print(f"most frequent value in `Screen` column: {float(data['Screen'].mode().iloc[0])}")

most frequent value in `Screen` column: 15.6


In [15]:
data['Screen'] = data['Screen'].ffill()
data.isnull().sum()

Laptop             0
Status             0
Brand              0
Model              0
CPU                0
RAM                0
Storage            0
Storage type      42
GPU             1371
Screen             0
Touch              0
Final Price        0
dtype: int64

In [16]:
print(f"After fillna, median value of `Screen` column: {float(data['Screen'].median())}")

After fillna, median value of `Screen` column: 15.6


### 7. Sum of weights
- Select all the "Innjoo" laptops from the dataset.
- Select only columns RAM, Storage, Screen.
- Get the underlying NumPy array. Let's call it X.
- Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
- Compute the inverse of XTX.
- Create an array y with values [1100, 1300, 800, 900, 1000, 1100].
- Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.
- What's the sum of all the elements of the result?

In [17]:
# Select only columns RAM, Storage, Screen. Get the underlying NumPy array. Let's call it X.
choice_columns = ['RAM', 'Storage', 'Screen']
X = data[data.Brand == 'innjoo'][choice_columns].values
print(f'Shape of X: {X.shape}')
X

Shape of X: (6, 3)


array([[  8. , 256. ,  15.6],
       [  8. , 512. ,  15.6],
       [  4. ,  64. ,  14.1],
       [  6. ,  64. ,  14.1],
       [  6. , 128. ,  14.1],
       [  6. , 128. ,  14.1]])

In [18]:
# Compute matrix-matrix multiplication between the transpose of X and X. 
# To get the transpose, use X.T. Let's call the result XTX.
XtimesXT = np.dot(X.T, X)
print(f'Shape of X: {X.shape}')
print(f"Shape of X.T: {X.T.shape}")
print(f"Shape of XTX: {XtimesXT.shape}")

Shape of X: (6, 3)
Shape of X.T: (3, 6)
Shape of XTX: (3, 3)


In [19]:
y = np.array([1100, 1300, 800, 900, 1000, 1100])
print(f"Shape of y: {y.shape}")

Shape of y: (6,)


In [20]:
XtimesXT

array([[2.52000e+02, 8.32000e+03, 5.59800e+02],
       [8.32000e+03, 3.68640e+05, 1.73952e+04],
       [5.59800e+02, 1.73952e+04, 1.28196e+03]])

In [21]:
np.linalg.inv(XtimesXT)

array([[ 2.78025381e-01, -1.51791334e-03, -1.00809855e-01],
       [-1.51791334e-03,  1.58286725e-05,  4.48052175e-04],
       [-1.00809855e-01,  4.48052175e-04,  3.87214888e-02]])

In [22]:
# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.
XTX_inv = np.linalg.inv(XtimesXT)
w = np.dot(np.dot(XTX_inv, X.T), y)
w

array([45.58076606,  0.42783519, 45.29127938])

In [23]:
float(sum(w))

91.29988062995753