# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

# Read Data

In [2]:
data = pd.read_csv("data/Process_dataset.csv")

In [3]:
data.head(5)

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight,Price
0,Apple,Ultrabook,13.3,1,Retina Display,0,2560,1600,Intel,Core i5,2.3GHz,8GB,128.0,SSD,Intel,Iris Plus Graphics,640,macOS,1.37kg,71378.6832
1,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,1.8GHz,8GB,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,47895.5232
2,HP,Notebook,15.6,0,Full HD,0,1920,1080,Intel,Core i5 7200U,2.5GHz,8GB,256.0,SSD,Intel,Hd Graphics,620,No OS,1.86kg,30636.0
3,Apple,Ultrabook,15.4,1,Retina Display,0,2880,1800,Intel,Core i7,2.7GHz,16GB,512.0,SSD,Amd,Radeon Pro,455,macOS,1.83kg,135195.336
4,Apple,Ultrabook,13.3,1,Retina Display,0,2560,1600,Intel,Core i5,3.1GHz,8GB,256.0,SSD,Intel,Iris Plus Graphics,650,macOS,1.37kg,96095.808


# Check Null Values

In [4]:
data.isnull().sum()

Company                        0
TypeName                       0
Inches                         0
panel_type_IPS                 0
DisplayType                  361
IsTouchscreen                  0
Resolution_Width               0
Resolution_Height              0
CPU_brand                      0
CPU_series_model_gen_type      0
CPU_speed                      0
Ram                            0
Total_Memory_Capacity_GB       1
Type                           1
GPU_Brand                      0
GPU_series                     1
GPU_model_no                  70
OpSys                          0
Weight                         0
Price                          0
dtype: int64

In [5]:
null_rows = data[data.isnull().any(axis=1)]
null_rows

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight,Price
1,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,1.8GHz,8GB,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,47895.5232
5,Acer,Notebook,15.6,0,,0,1366,768,Amd,A9-Series 9420,3GHz,4GB,500.0,HDD,Amd,Radeon,R5,Windows 10,2.1kg,21312.0000
6,Apple,Ultrabook,15.4,1,Retina Display,0,2880,1800,Intel,Core i7,2.2GHz,16GB,256.0,Flash Storage,Intel,Iris Pro Graphics,,Mac OS X,2.04kg,114017.6016
7,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,1.8GHz,8GB,256.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,61735.5360
10,HP,Notebook,15.6,0,,0,1366,768,Intel,Core i5 7200U,2.5GHz,4GB,500.0,HDD,Intel,Hd Graphics,620,No OS,1.86kg,20986.9920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,HP,Netbook,11.6,0,,0,1366,768,Intel,Celeron Dual Core N3060,1.6GHz,2GB,32.0,Flash Storage,Intel,Hd Graphics,400,Windows 10,1.17kg,11135.5200
1267,Asus,Notebook,15.6,0,,0,1366,768,Intel,Core i7 6500U,2.5GHz,4GB,500.0,HDD,Nvidia,Geforce,920M,Windows 10,2.2kg,38378.6496
1270,Lenovo,Notebook,14.0,0,,0,1366,768,Intel,Celeron Dual Core N3050,1.6GHz,2GB,64.0,Flash Storage,Intel,Hd Graphics,,Windows 10,1.5kg,12201.1200
1271,HP,Notebook,15.6,0,,0,1366,768,Intel,Core i7 6500U,2.5GHz,6GB,1024.0,HDD,Amd,Radeon R5,M330,Windows 10,2.19kg,40705.9200


In [6]:
null_display_type = data[data["DisplayType"].isnull()]
null_display_type

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,CPU_speed,Ram,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight,Price
1,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,1.8GHz,8GB,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,47895.5232
5,Acer,Notebook,15.6,0,,0,1366,768,Amd,A9-Series 9420,3GHz,4GB,500.0,HDD,Amd,Radeon,R5,Windows 10,2.1kg,21312.0000
7,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,1.8GHz,8GB,256.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,61735.5360
10,HP,Notebook,15.6,0,,0,1366,768,Intel,Core i5 7200U,2.5GHz,4GB,500.0,HDD,Intel,Hd Graphics,620,No OS,1.86kg,20986.9920
21,HP,Notebook,15.6,0,,0,1366,768,Amd,E-Series E2-9000e,1.5GHz,4GB,500.0,HDD,Amd,Radeon,R2,No OS,1.86kg,13746.2400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,HP,Netbook,11.6,0,,0,1366,768,Intel,Celeron Dual Core N3060,1.6GHz,2GB,32.0,Flash Storage,Intel,Hd Graphics,400,Windows 10,1.17kg,11135.5200
1267,Asus,Notebook,15.6,0,,0,1366,768,Intel,Core i7 6500U,2.5GHz,4GB,500.0,HDD,Nvidia,Geforce,920M,Windows 10,2.2kg,38378.6496
1270,Lenovo,Notebook,14.0,0,,0,1366,768,Intel,Celeron Dual Core N3050,1.6GHz,2GB,64.0,Flash Storage,Intel,Hd Graphics,,Windows 10,1.5kg,12201.1200
1271,HP,Notebook,15.6,0,,0,1366,768,Intel,Core i7 6500U,2.5GHz,6GB,1024.0,HDD,Amd,Radeon R5,M330,Windows 10,2.19kg,40705.9200


# Check Unique Values in Column

In [7]:
data.nunique()

Company                       19
TypeName                       6
Inches                        24
panel_type_IPS                 2
DisplayType                    7
IsTouchscreen                  2
Resolution_Width              13
Resolution_Height             10
CPU_brand                      3
CPU_series_model_gen_type     93
CPU_speed                     29
Ram                           10
Total_Memory_Capacity_GB      22
Type                           7
GPU_Brand                      4
GPU_series                    25
GPU_model_no                  82
OpSys                          9
Weight                       189
Price                        777
dtype: int64

In [8]:
numerical_cols = data.select_dtypes(include=['float', 'float64']).columns.tolist()
categorical_cols = data.select_dtypes(include=['object', 'category']).columns.tolist()

print (f"{numerical_cols} \n {categorical_cols}")

['Inches', 'Total_Memory_Capacity_GB', 'Price'] 
 ['Company', 'TypeName', 'DisplayType', 'CPU_brand', 'CPU_series_model_gen_type', 'CPU_speed', 'Ram', 'Type', 'GPU_Brand', 'GPU_series', 'GPU_model_no', 'OpSys', 'Weight']


### Unique Values in Categorcal columns and Converting if needed 

#### CPU_speed


In [9]:
data["CPU_speed"].unique()

array(['2.3GHz', '1.8GHz', '2.5GHz', '2.7GHz', '3.1GHz', '3GHz', '2.2GHz',
       '1.6GHz', '2GHz', '2.8GHz', '1.2GHz', '2.9GHz', '2.4GHz', '1.5GHz',
       '1.9GHz', '1.44GHz', '1.1GHz', '2.0GHz', '1.3GHz', '2.6GHz',
       '3.6GHz', '1.60GHz', '3.2GHz', '1.0GHz', '2.1GHz', '0.9GHz',
       '1.92GHz', '2.50GHz', '2.70GHz'], dtype=object)

In [None]:
# Extracting Unit from CPU_Speed 

data[['CPU_Speed_Value', 'CPU_Unit']] = (
    data['CPU_speed']
.str.extract(r'(\d+\.?\d*)\s*([GM]Hz)', expand=True)
)

# Converting into Float
data ["CPU_Speed_Value_GHz"] = data['CPU_Speed_Value'].astype(float)



In [17]:
# Converting MHz -> GHz
data.loc[data["CPU_Unit"]=='MHz', 'CPU_Speed_Value'] /=1000

### How This work?

```
data.loc[data['CPU_Unit'] == 'MHz', 'CPU_Speed_Value'] /= 1000
```

1. **`data['CPU_Unit'] == 'MHz'`**  
   - Creates a **boolean mask** (True/False series) where:  
     - `True` → Rows with `CPU_Unit = 'MHz'`  
     - `False` → Rows with other units (e.g., `GHz`) or missing values.  

   **Example Mask**:  
   | CPU_Unit | Mask (`== 'MHz'`) |
   |----------|-------------------|
   | GHz      | `False`           |
   | MHz      | `True`            |
   | GHz      | `False`           |
   | MHz      | `True`            |

2. **`df.loc[mask, 'CPU_Speed_Value']`**  
   - Selects **only the rows where the mask is True** and targets the `CPU_Speed_Value` column.  
   - **Example Selection**:  
     | CPU_Speed_Value |  
     |-----------------|  
     | 3500.0          | *(from MHz rows)*  
     | 2800.0          | *(from MHz rows)*  

3. **`/= 1000`**  
   - Divides the selected values by 1000 to convert MHz → GHz.  
   - **Example Conversion**:  
     - `3500.0 MHz` → `3.5 GHz`  
     - `2800.0 MHz` → `2.8 GHz`  

---


In [23]:
data

Unnamed: 0,Company,TypeName,Inches,panel_type_IPS,DisplayType,IsTouchscreen,Resolution_Width,Resolution_Height,CPU_brand,CPU_series_model_gen_type,...,Total_Memory_Capacity_GB,Type,GPU_Brand,GPU_series,GPU_model_no,OpSys,Weight,Price,CPU_Unit,CPU_Speed_Value_GHz
0,Apple,Ultrabook,13.3,1,Retina Display,0,2560,1600,Intel,Core i5,...,128.0,SSD,Intel,Iris Plus Graphics,640,macOS,1.37kg,71378.6832,GHz,2.3
1,Apple,Ultrabook,13.3,0,,0,1440,900,Intel,Core i5,...,128.0,Flash Storage,Intel,Hd Graphics,6000,macOS,1.34kg,47895.5232,GHz,1.8
2,HP,Notebook,15.6,0,Full HD,0,1920,1080,Intel,Core i5 7200U,...,256.0,SSD,Intel,Hd Graphics,620,No OS,1.86kg,30636.0000,GHz,2.5
3,Apple,Ultrabook,15.4,1,Retina Display,0,2880,1800,Intel,Core i7,...,512.0,SSD,Amd,Radeon Pro,455,macOS,1.83kg,135195.3360,GHz,2.7
4,Apple,Ultrabook,13.3,1,Retina Display,0,2560,1600,Intel,Core i5,...,256.0,SSD,Intel,Iris Plus Graphics,650,macOS,1.37kg,96095.8080,GHz,3.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1268,Lenovo,2 in 1 Convertible,14.0,1,Full HD,1,1920,1080,Intel,Core i7 6500U,...,128.0,SSD,Intel,Hd Graphics,520,Windows 10,1.8kg,33992.6400,GHz,2.5
1269,Lenovo,2 in 1 Convertible,13.3,1,Quad HD+,1,3200,1800,Intel,Core i7 6500U,...,512.0,SSD,Intel,Hd Graphics,520,Windows 10,1.3kg,79866.7200,GHz,2.5
1270,Lenovo,Notebook,14.0,0,,0,1366,768,Intel,Celeron Dual Core N3050,...,64.0,Flash Storage,Intel,Hd Graphics,,Windows 10,1.5kg,12201.1200,GHz,1.6
1271,HP,Notebook,15.6,0,,0,1366,768,Intel,Core i7 6500U,...,1024.0,HDD,Amd,Radeon R5,M330,Windows 10,2.19kg,40705.9200,GHz,2.5


In [12]:
def Unique_Values(Column_names):
    for column in Column_names:
        print ()

In [25]:
data["Ram"].unique()

array(['8GB', '16GB', '4GB', '2GB', '12GB', '64GB', '6GB', '32GB', '24GB',
       '1GB'], dtype=object)

# Exploratory Data Analysis (EDA)

In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1273 entries, 0 to 1272
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Company                    1273 non-null   object 
 1   TypeName                   1273 non-null   object 
 2   Inches                     1273 non-null   float64
 3   panel_type_IPS             1273 non-null   int64  
 4   DisplayType                912 non-null    object 
 5   IsTouchscreen              1273 non-null   int64  
 6   Resolution_Width           1273 non-null   int64  
 7   Resolution_Height          1273 non-null   int64  
 8   CPU_brand                  1273 non-null   object 
 9   CPU_series_model_gen_type  1273 non-null   object 
 10  CPU_speed                  1273 non-null   object 
 11  Ram                        1273 non-null   object 
 12  Total_Memory_Capacity_GB   1272 non-null   float64
 13  Type                       1272 non-null   objec

In [14]:
data.describe()

Unnamed: 0,Inches,panel_type_IPS,IsTouchscreen,Resolution_Width,Resolution_Height,Total_Memory_Capacity_GB,Price,CPU_Speed_Value
count,1273.0,1273.0,1273.0,1273.0,1273.0,1272.0,1273.0,1273.0
mean,15.129929,0.282011,0.145326,1894.219953,1070.538885,621.525157,59955.814073,2.298916
std,1.953925,0.450156,0.352568,494.195581,284.400487,481.133282,37332.251005,0.507118
min,10.1,0.0,0.0,1366.0,768.0,8.0,9270.72,0.9
25%,14.0,0.0,0.0,1600.0,900.0,256.0,31914.72,2.0
50%,15.6,0.0,0.0,1920.0,1080.0,500.0,52161.12,2.5
75%,15.6,1.0,0.0,1920.0,1080.0,1024.0,79333.3872,2.7
max,35.6,1.0,1.0,3840.0,2160.0,2560.0,324954.72,3.6
