In [31]:
import pandas as pd
import requests
import numpy as np
pd.__version__

'2.2.1'

In [5]:
# script to dowbload file from repo using url
url = 'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv'
response = requests.get(url)

with open('laptops.csv', 'wb') as file:
    file.write(response.content)

print("Download complete!")



Download complete!


In [7]:
# read file and check dataframe 
df = pd.read_csv('laptops.csv')
df.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [9]:
# How many records are in the dataset?
len(df)

2160

In [10]:
# laptop brands 
df['Brand'].nunique()

27

In [16]:
# missing values i.e. total number of columns with missing values 
df.isna().sum()

Laptop             0
Status             0
Brand              0
Model              0
CPU                0
RAM                0
Storage            0
Storage type      42
GPU             1371
Screen             4
Touch              0
Final Price        0
dtype: int64

In [19]:
# get list of brands in dataframe 
df['Brand'].unique()

array(['Asus', 'Alurin', 'MSI', 'HP', 'Lenovo', 'Medion', 'Acer', 'Apple',
       'Razer', 'Gigabyte', 'Dell', 'LG', 'Samsung', 'PcCom', 'Microsoft',
       'Primux', 'Prixton', 'Dynabook Toshiba', 'Thomson', 'Denver',
       'Deep Gaming', 'Vant', 'Innjoo', 'Jetwing', 'Millenium', 'Realme',
       'Toshiba'], dtype=object)

In [21]:
# get the unique models 
df[df['Brand']=='Dell']['Model'].unique()

array(['Vostro', 'Latitude', 'XPS', 'Precision'], dtype=object)

In [22]:
# maximum price among dell notebooks
df[(df['Brand']=='Dell')]['Final Price'].max()

3936.0

In [23]:
# Calculate the median value of the Screen column
df['Screen'].median()

15.6

In [24]:
# Calculate the most frequent value of the Screen column
df['Screen'].mode()[0]

15.6

In [25]:
# Use fillna method to fill the missing values in Screen column with the most frequent value
df['Screen'].fillna(df['Screen'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Screen'].fillna(df['Screen'].mode()[0], inplace=True)


In [27]:
# Calculate the median value of Screen once again
df['Screen'].median()


15.6

In [39]:
# Select all the "Innjoo" laptops and only columns RAM, Storage, Screen
innjoo_laptops = df[(df['Brand'] == 'Innjoo')][['RAM', 'Storage', 'Screen']]

In [40]:
# Get the underlying NumPy array
X = innjoo_laptops.to_numpy()

In [41]:
X

array([[  8. , 256. ,  15.6],
       [  8. , 512. ,  15.6],
       [  4. ,  64. ,  14.1],
       [  6. ,  64. ,  14.1],
       [  6. , 128. ,  14.1],
       [  6. , 128. ,  14.1]])

In [42]:
# Compute matrix-matrix multiplication between the transpose of X and X
XTX = np.dot(X.T, X)

In [43]:
# Compute the pseudo-inverse of XTX
XTX_inv = np.linalg.pinv(XTX)

In [44]:
# Create an array y with values [1100, 1300, 800, 900, 1000, 1100]
y = np.array([1100, 1300, 800, 900, 1000, 1100])

In [45]:
# Multiply the pseudo-inverse of XTX with the transpose of X, and then multiply the result by y
w = np.dot(np.dot(XTX_inv, X.T), y[:X.shape[0]])


In [46]:
# Calculate the sum of all the elements of the result
sum_w = np.sum(w)

print(f"The sum of all the elements of the result is {sum_w}.")

The sum of all the elements of the result is 91.29988062994354.
